github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "reflect" 8 "sort" 9 "strings" 10 "time" 11 12 "github.com/hashicorp/go-bexpr" 13 "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/go-memdb" 15 "github.com/hashicorp/go-multierror" 16 "github.com/hashicorp/nomad/helper/pointer" 17 "github.com/hashicorp/nomad/nomad/stream" 18 "github.com/hashicorp/nomad/nomad/structs" 19 ) 20 21 // Txn is a transaction against a state store. 22 // This can be a read or write transaction. 23 type Txn = *txn 24 25 // SortOption represents how results can be sorted. 26 type SortOption bool 27 28 const ( 29 // SortDefault indicates that the result should be returned using the 30 // default go-memdb ResultIterator order. 31 SortDefault SortOption = false 32 33 // SortReverse indicates that the result should be returned using the 34 // reversed go-memdb ResultIterator order. 35 SortReverse SortOption = true 36 ) 37 38 const ( 39 // NodeEligibilityEventPlanRejectThreshold is the message used when the node 40 // is set to ineligible due to multiple plan failures. 41 // This is a preventive measure to signal scheduler workers to not consider 42 // the node for future placements. 43 // Plan rejections for a node are expected due to the optimistic and 44 // concurrent nature of the scheduling process, but repeated failures for 45 // the same node may indicate an underlying issue not detected by Nomad. 46 // The plan applier keeps track of plan rejection history and will mark 47 // nodes as ineligible if they cross a given threshold. 48 NodeEligibilityEventPlanRejectThreshold = "Node marked as ineligible for scheduling due to multiple plan rejections, refer to https://www.nomadproject.io/s/port-plan-failure for more information" 49 50 // NodeRegisterEventRegistered is the message used when the node becomes 51 // registered. 52 NodeRegisterEventRegistered = "Node registered" 53 54 // NodeRegisterEventReregistered is the message used when the node becomes 55 // re-registered. 56 NodeRegisterEventReregistered = "Node re-registered" 57 ) 58 59 // terminate appends the go-memdb terminator character to s. 60 // 61 // We can then use the result for exact matches during prefix 62 // scans over compound indexes that start with s. 63 func terminate(s string) string { 64 return s + "\x00" 65 } 66 67 // IndexEntry is used with the "index" table 68 // for managing the latest Raft index affecting a table. 69 type IndexEntry struct { 70 Key string 71 Value uint64 72 } 73 74 // StateStoreConfig is used to configure a new state store 75 type StateStoreConfig struct { 76 // Logger is used to output the state store's logs 77 Logger hclog.Logger 78 79 // Region is the region of the server embedding the state store. 80 Region string 81 82 // EnablePublisher is used to enable or disable the event publisher 83 EnablePublisher bool 84 85 // EventBufferSize configures the amount of events to hold in memory 86 EventBufferSize int64 87 } 88 89 // The StateStore is responsible for maintaining all the Nomad 90 // state. It is manipulated by the FSM which maintains consistency 91 // through the use of Raft. The goals of the StateStore are to provide 92 // high concurrency for read operations without blocking writes, and 93 // to provide write availability in the face of reads. EVERY object 94 // returned as a result of a read against the state store should be 95 // considered a constant and NEVER modified in place. 96 type StateStore struct { 97 logger hclog.Logger 98 db *changeTrackerDB 99 100 // config is the passed in configuration 101 config *StateStoreConfig 102 103 // abandonCh is used to signal watchers that this state store has been 104 // abandoned (usually during a restore). This is only ever closed. 105 abandonCh chan struct{} 106 107 // TODO: refactor abandonCh to use a context so that both can use the same 108 // cancel mechanism. 109 stopEventBroker func() 110 } 111 112 type streamACLDelegate struct { 113 s *StateStore 114 } 115 116 func (a *streamACLDelegate) TokenProvider() stream.ACLTokenProvider { 117 resolver, _ := a.s.Snapshot() 118 return resolver 119 } 120 121 // NewStateStore is used to create a new state store 122 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 123 // Create the MemDB 124 db, err := memdb.NewMemDB(stateStoreSchema()) 125 if err != nil { 126 return nil, fmt.Errorf("state store setup failed: %v", err) 127 } 128 129 // Create the state store 130 ctx, cancel := context.WithCancel(context.TODO()) 131 s := &StateStore{ 132 logger: config.Logger.Named("state_store"), 133 config: config, 134 abandonCh: make(chan struct{}), 135 stopEventBroker: cancel, 136 } 137 138 if config.EnablePublisher { 139 // Create new event publisher using provided config 140 broker, err := stream.NewEventBroker(ctx, &streamACLDelegate{s}, stream.EventBrokerCfg{ 141 EventBufferSize: config.EventBufferSize, 142 Logger: config.Logger, 143 }) 144 if err != nil { 145 return nil, fmt.Errorf("creating state store event broker %w", err) 146 } 147 s.db = NewChangeTrackerDB(db, broker, eventsFromChanges) 148 } else { 149 s.db = NewChangeTrackerDB(db, nil, noOpProcessChanges) 150 } 151 152 // Initialize the state store with the default namespace. 153 if err := s.namespaceInit(); err != nil { 154 return nil, fmt.Errorf("enterprise state store initialization failed: %v", err) 155 } 156 157 return s, nil 158 } 159 160 // NewWatchSet returns a new memdb.WatchSet that adds the state stores abandonCh 161 // as a watcher. This is important in that it will notify when this specific 162 // state store is no longer valid, usually due to a new snapshot being loaded 163 func (s *StateStore) NewWatchSet() memdb.WatchSet { 164 ws := memdb.NewWatchSet() 165 ws.Add(s.AbandonCh()) 166 return ws 167 } 168 169 func (s *StateStore) EventBroker() (*stream.EventBroker, error) { 170 if s.db.publisher == nil { 171 return nil, fmt.Errorf("EventBroker not configured") 172 } 173 return s.db.publisher, nil 174 } 175 176 // namespaceInit ensures the default namespace exists. 177 func (s *StateStore) namespaceInit() error { 178 // Create the default namespace. This is safe to do every time we create the 179 // state store. There are two main cases, a brand new cluster in which case 180 // each server will have the same default namespace object, or a new cluster 181 // in which case if the default namespace has been modified, it will be 182 // overridden by the restore code path. 183 defaultNs := &structs.Namespace{ 184 Name: structs.DefaultNamespace, 185 Description: structs.DefaultNamespaceDescription, 186 } 187 188 if err := s.UpsertNamespaces(1, []*structs.Namespace{defaultNs}); err != nil { 189 return fmt.Errorf("inserting default namespace failed: %v", err) 190 } 191 192 return nil 193 } 194 195 // Config returns the state store configuration. 196 func (s *StateStore) Config() *StateStoreConfig { 197 return s.config 198 } 199 200 // Snapshot is used to create a point in time snapshot. Because 201 // we use MemDB, we just need to snapshot the state of the underlying 202 // database. 203 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 204 memDBSnap := s.db.memdb.Snapshot() 205 206 store := StateStore{ 207 logger: s.logger, 208 config: s.config, 209 } 210 211 // Create a new change tracker DB that does not publish or track changes 212 store.db = NewChangeTrackerDB(memDBSnap, nil, noOpProcessChanges) 213 214 snap := &StateSnapshot{ 215 StateStore: store, 216 } 217 return snap, nil 218 } 219 220 // SnapshotMinIndex is used to create a state snapshot where the index is 221 // guaranteed to be greater than or equal to the index parameter. 222 // 223 // Some server operations (such as scheduling) exchange objects via RPC 224 // concurrent with Raft log application, so they must ensure the state store 225 // snapshot they are operating on is at or after the index the objects 226 // retrieved via RPC were applied to the Raft log at. 227 // 228 // Callers should maintain their own timer metric as the time this method 229 // blocks indicates Raft log application latency relative to scheduling. 230 func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*StateSnapshot, error) { 231 // Ported from work.go:waitForIndex prior to 0.9 232 233 const backoffBase = 20 * time.Millisecond 234 const backoffLimit = 1 * time.Second 235 var retries uint 236 var retryTimer *time.Timer 237 238 // XXX: Potential optimization is to set up a watch on the state 239 // store's index table and only unblock via a trigger rather than 240 // polling. 241 for { 242 // Get the states current index 243 snapshotIndex, err := s.LatestIndex() 244 if err != nil { 245 return nil, fmt.Errorf("failed to determine state store's index: %v", err) 246 } 247 248 // We only need the FSM state to be as recent as the given index 249 if snapshotIndex >= index { 250 return s.Snapshot() 251 } 252 253 // Exponential back off 254 retries++ 255 if retryTimer == nil { 256 // First retry, start at baseline 257 retryTimer = time.NewTimer(backoffBase) 258 } else { 259 // Subsequent retry, reset timer 260 deadline := 1 << (2 * retries) * backoffBase 261 if deadline > backoffLimit { 262 deadline = backoffLimit 263 } 264 retryTimer.Reset(deadline) 265 } 266 267 select { 268 case <-ctx.Done(): 269 return nil, ctx.Err() 270 case <-retryTimer.C: 271 } 272 } 273 } 274 275 // Restore is used to optimize the efficiency of rebuilding 276 // state by minimizing the number of transactions and checking 277 // overhead. 278 func (s *StateStore) Restore() (*StateRestore, error) { 279 txn := s.db.WriteTxnRestore() 280 r := &StateRestore{ 281 txn: txn, 282 } 283 return r, nil 284 } 285 286 // AbandonCh returns a channel you can wait on to know if the state store was 287 // abandoned. 288 func (s *StateStore) AbandonCh() <-chan struct{} { 289 return s.abandonCh 290 } 291 292 // Abandon is used to signal that the given state store has been abandoned. 293 // Calling this more than one time will panic. 294 func (s *StateStore) Abandon() { 295 s.StopEventBroker() 296 close(s.abandonCh) 297 } 298 299 // StopEventBroker calls the cancel func for the state stores event 300 // publisher. It should be called during server shutdown. 301 func (s *StateStore) StopEventBroker() { 302 s.stopEventBroker() 303 } 304 305 // QueryFn is the definition of a function that can be used to implement a basic 306 // blocking query against the state store. 307 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 308 309 // BlockingQuery takes a query function and runs the function until the minimum 310 // query index is met or until the passed context is cancelled. 311 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 312 resp interface{}, index uint64, err error) { 313 314 RUN_QUERY: 315 // We capture the state store and its abandon channel but pass a snapshot to 316 // the blocking query function. We operate on the snapshot to allow separate 317 // calls to the state store not all wrapped within the same transaction. 318 abandonCh := s.AbandonCh() 319 snap, _ := s.Snapshot() 320 stateSnap := &snap.StateStore 321 322 // We can skip all watch tracking if this isn't a blocking query. 323 var ws memdb.WatchSet 324 if minIndex > 0 { 325 ws = memdb.NewWatchSet() 326 327 // This channel will be closed if a snapshot is restored and the 328 // whole state store is abandoned. 329 ws.Add(abandonCh) 330 } 331 332 resp, index, err = query(ws, stateSnap) 333 if err != nil { 334 return nil, index, err 335 } 336 337 // We haven't reached the min-index yet. 338 if minIndex > 0 && index <= minIndex { 339 if err := ws.WatchCtx(ctx); err != nil { 340 return nil, index, err 341 } 342 343 goto RUN_QUERY 344 } 345 346 return resp, index, nil 347 } 348 349 // UpsertPlanResults is used to upsert the results of a plan. 350 func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64, results *structs.ApplyPlanResultsRequest) error { 351 snapshot, err := s.Snapshot() 352 if err != nil { 353 return err 354 } 355 356 allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped) 357 if err != nil { 358 return err 359 } 360 361 allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted) 362 if err != nil { 363 return err 364 } 365 366 // COMPAT 0.11: Remove this denormalization when NodePreemptions is removed 367 results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions) 368 if err != nil { 369 return err 370 } 371 372 txn := s.db.WriteTxnMsgT(msgType, index) 373 defer txn.Abort() 374 375 // Mark nodes as ineligible. 376 for _, nodeID := range results.IneligibleNodes { 377 s.logger.Warn("marking node as ineligible due to multiple plan rejections, refer to https://www.nomadproject.io/s/port-plan-failure for more information", "node_id", nodeID) 378 379 nodeEvent := structs.NewNodeEvent(). 380 SetSubsystem(structs.NodeEventSubsystemScheduler). 381 SetMessage(NodeEligibilityEventPlanRejectThreshold) 382 383 err := s.updateNodeEligibilityImpl(index, nodeID, 384 structs.NodeSchedulingIneligible, results.UpdatedAt, nodeEvent, txn) 385 if err != nil { 386 return err 387 } 388 } 389 390 // Upsert the newly created or updated deployment 391 if results.Deployment != nil { 392 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 393 return err 394 } 395 } 396 397 // Update the status of deployments effected by the plan. 398 if len(results.DeploymentUpdates) != 0 { 399 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 400 } 401 402 if results.EvalID != "" { 403 // Update the modify index of the eval id 404 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 405 return err 406 } 407 } 408 409 numAllocs := 0 410 if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 { 411 // COMPAT 0.11: This branch will be removed, when Alloc is removed 412 // Attach the job to all the allocations. It is pulled out in the payload to 413 // avoid the redundancy of encoding, but should be denormalized prior to 414 // being inserted into MemDB. 415 addComputedAllocAttrs(results.Alloc, results.Job) 416 numAllocs = len(results.Alloc) + len(results.NodePreemptions) 417 } else { 418 // Attach the job to all the allocations. It is pulled out in the payload to 419 // avoid the redundancy of encoding, but should be denormalized prior to 420 // being inserted into MemDB. 421 addComputedAllocAttrs(results.AllocsUpdated, results.Job) 422 numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted) 423 } 424 425 allocsToUpsert := make([]*structs.Allocation, 0, numAllocs) 426 427 // COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed 428 allocsToUpsert = append(allocsToUpsert, results.Alloc...) 429 allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...) 430 431 allocsToUpsert = append(allocsToUpsert, allocsStopped...) 432 allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...) 433 allocsToUpsert = append(allocsToUpsert, allocsPreempted...) 434 435 // handle upgrade path 436 for _, alloc := range allocsToUpsert { 437 alloc.Canonicalize() 438 } 439 440 if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil { 441 return err 442 } 443 444 // Upsert followup evals for allocs that were preempted 445 for _, eval := range results.PreemptionEvals { 446 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 447 return err 448 } 449 } 450 451 return txn.Commit() 452 } 453 454 // addComputedAllocAttrs adds the computed/derived attributes to the allocation. 455 // This method is used when an allocation is being denormalized. 456 func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) { 457 structs.DenormalizeAllocationJobs(job, allocs) 458 459 // COMPAT(0.11): Remove in 0.11 460 // Calculate the total resources of allocations. It is pulled out in the 461 // payload to avoid encoding something that can be computed, but should be 462 // denormalized prior to being inserted into MemDB. 463 for _, alloc := range allocs { 464 if alloc.Resources != nil { 465 continue 466 } 467 468 alloc.Resources = new(structs.Resources) 469 for _, task := range alloc.TaskResources { 470 alloc.Resources.Add(task) 471 } 472 473 // Add the shared resources 474 alloc.Resources.Add(alloc.SharedResources) 475 } 476 } 477 478 // upsertDeploymentUpdates updates the deployments given the passed status 479 // updates. 480 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *txn) error { 481 for _, u := range updates { 482 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 483 return err 484 } 485 } 486 487 return nil 488 } 489 490 // UpsertJobSummary upserts a job summary into the state store. 491 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 492 txn := s.db.WriteTxn(index) 493 defer txn.Abort() 494 495 // Check if the job summary already exists 496 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 497 if err != nil { 498 return fmt.Errorf("job summary lookup failed: %v", err) 499 } 500 501 // Setup the indexes correctly 502 if existing != nil { 503 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 504 jobSummary.ModifyIndex = index 505 } else { 506 jobSummary.CreateIndex = index 507 jobSummary.ModifyIndex = index 508 } 509 510 // Update the index 511 if err := txn.Insert("job_summary", jobSummary); err != nil { 512 return err 513 } 514 515 // Update the indexes table for job summary 516 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 517 return fmt.Errorf("index update failed: %v", err) 518 } 519 520 return txn.Commit() 521 } 522 523 // DeleteJobSummary deletes the job summary with the given ID. This is for 524 // testing purposes only. 525 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 526 txn := s.db.WriteTxn(index) 527 defer txn.Abort() 528 529 // Delete the job summary 530 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 531 return fmt.Errorf("deleting job summary failed: %v", err) 532 } 533 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 534 return fmt.Errorf("index update failed: %v", err) 535 } 536 return txn.Commit() 537 } 538 539 // UpsertDeployment is used to insert or update a new deployment. 540 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 541 txn := s.db.WriteTxn(index) 542 defer txn.Abort() 543 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 544 return err 545 } 546 return txn.Commit() 547 } 548 549 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *txn) error { 550 // Check if the deployment already exists 551 existing, err := txn.First("deployment", "id", deployment.ID) 552 if err != nil { 553 return fmt.Errorf("deployment lookup failed: %v", err) 554 } 555 556 // Setup the indexes correctly 557 if existing != nil { 558 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 559 deployment.ModifyIndex = index 560 } else { 561 deployment.CreateIndex = index 562 deployment.ModifyIndex = index 563 } 564 565 // Insert the deployment 566 if err := txn.Insert("deployment", deployment); err != nil { 567 return err 568 } 569 570 // Update the indexes table for deployment 571 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 572 return fmt.Errorf("index update failed: %v", err) 573 } 574 575 // If the deployment is being marked as complete, set the job to stable. 576 if deployment.Status == structs.DeploymentStatusSuccessful { 577 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 578 return fmt.Errorf("failed to update job stability: %v", err) 579 } 580 } 581 582 return nil 583 } 584 585 func (s *StateStore) Deployments(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 586 txn := s.db.ReadTxn() 587 588 var it memdb.ResultIterator 589 var err error 590 591 switch sort { 592 case SortReverse: 593 it, err = txn.GetReverse("deployment", "create") 594 default: 595 it, err = txn.Get("deployment", "create") 596 } 597 598 if err != nil { 599 return nil, err 600 } 601 602 ws.Add(it.WatchCh()) 603 604 return it, nil 605 } 606 607 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 608 txn := s.db.ReadTxn() 609 610 // Walk the entire deployments table 611 iter, err := txn.Get("deployment", "namespace", namespace) 612 if err != nil { 613 return nil, err 614 } 615 616 ws.Add(iter.WatchCh()) 617 return iter, nil 618 } 619 620 func (s *StateStore) DeploymentsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 621 txn := s.db.ReadTxn() 622 623 var ( 624 it memdb.ResultIterator 625 err error 626 exact = terminate(namespace) 627 ) 628 629 switch sort { 630 case SortReverse: 631 it, err = txn.GetReverse("deployment", "namespace_create_prefix", exact) 632 default: 633 it, err = txn.Get("deployment", "namespace_create_prefix", exact) 634 } 635 636 if err != nil { 637 return nil, err 638 } 639 640 ws.Add(it.WatchCh()) 641 642 return it, nil 643 } 644 645 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string, sort SortOption) (memdb.ResultIterator, error) { 646 txn := s.db.ReadTxn() 647 648 var iter memdb.ResultIterator 649 var err error 650 651 // Walk the entire deployments table 652 switch sort { 653 case SortReverse: 654 iter, err = txn.GetReverse("deployment", "id_prefix", deploymentID) 655 default: 656 iter, err = txn.Get("deployment", "id_prefix", deploymentID) 657 } 658 if err != nil { 659 return nil, err 660 } 661 662 ws.Add(iter.WatchCh()) 663 664 // Wrap the iterator in a filter 665 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 666 return wrap, nil 667 } 668 669 // deploymentNamespaceFilter returns a filter function that filters all 670 // deployment not in the given namespace. 671 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 672 return func(raw interface{}) bool { 673 d, ok := raw.(*structs.Deployment) 674 if !ok { 675 return true 676 } 677 678 return d.Namespace != namespace 679 } 680 } 681 682 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 683 txn := s.db.ReadTxn() 684 return s.deploymentByIDImpl(ws, deploymentID, txn) 685 } 686 687 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *txn) (*structs.Deployment, error) { 688 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 689 if err != nil { 690 return nil, fmt.Errorf("deployment lookup failed: %v", err) 691 } 692 ws.Add(watchCh) 693 694 if existing != nil { 695 return existing.(*structs.Deployment), nil 696 } 697 698 return nil, nil 699 } 700 701 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) { 702 txn := s.db.ReadTxn() 703 704 var job *structs.Job 705 // Read job from state store 706 _, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID) 707 if err != nil { 708 return nil, fmt.Errorf("job lookup failed: %v", err) 709 } 710 if existing != nil { 711 job = existing.(*structs.Job) 712 } 713 714 // Get an iterator over the deployments 715 iter, err := txn.Get("deployment", "job", namespace, jobID) 716 if err != nil { 717 return nil, err 718 } 719 720 ws.Add(iter.WatchCh()) 721 722 var out []*structs.Deployment 723 for { 724 raw := iter.Next() 725 if raw == nil { 726 break 727 } 728 d := raw.(*structs.Deployment) 729 730 // If the allocation belongs to a job with the same ID but a different 731 // create index and we are not getting all the allocations whose Jobs 732 // matches the same Job ID then we skip it 733 if !all && job != nil && d.JobCreateIndex != job.CreateIndex { 734 continue 735 } 736 out = append(out, d) 737 } 738 739 return out, nil 740 } 741 742 // LatestDeploymentByJobID returns the latest deployment for the given job. The 743 // latest is determined strictly by CreateIndex. 744 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 745 txn := s.db.ReadTxn() 746 747 // Get an iterator over the deployments 748 iter, err := txn.Get("deployment", "job", namespace, jobID) 749 if err != nil { 750 return nil, err 751 } 752 753 ws.Add(iter.WatchCh()) 754 755 var out *structs.Deployment 756 for { 757 raw := iter.Next() 758 if raw == nil { 759 break 760 } 761 762 d := raw.(*structs.Deployment) 763 if out == nil || out.CreateIndex < d.CreateIndex { 764 out = d 765 } 766 } 767 768 return out, nil 769 } 770 771 // DeleteDeployment is used to delete a set of deployments by ID 772 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 773 txn := s.db.WriteTxn(index) 774 defer txn.Abort() 775 776 if len(deploymentIDs) == 0 { 777 return nil 778 } 779 780 for _, deploymentID := range deploymentIDs { 781 // Lookup the deployment 782 existing, err := txn.First("deployment", "id", deploymentID) 783 if err != nil { 784 return fmt.Errorf("deployment lookup failed: %v", err) 785 } 786 if existing == nil { 787 return fmt.Errorf("deployment not found") 788 } 789 790 // Delete the deployment 791 if err := txn.Delete("deployment", existing); err != nil { 792 return fmt.Errorf("deployment delete failed: %v", err) 793 } 794 } 795 796 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 797 return fmt.Errorf("index update failed: %v", err) 798 } 799 800 return txn.Commit() 801 } 802 803 // UpsertScalingEvent is used to insert a new scaling event. 804 // Only the most recent JobTrackedScalingEvents will be kept. 805 func (s *StateStore) UpsertScalingEvent(index uint64, req *structs.ScalingEventRequest) error { 806 txn := s.db.WriteTxn(index) 807 defer txn.Abort() 808 809 // Get the existing events 810 existing, err := txn.First("scaling_event", "id", req.Namespace, req.JobID) 811 if err != nil { 812 return fmt.Errorf("scaling event lookup failed: %v", err) 813 } 814 815 var jobEvents *structs.JobScalingEvents 816 if existing != nil { 817 jobEvents = existing.(*structs.JobScalingEvents) 818 } else { 819 jobEvents = &structs.JobScalingEvents{ 820 Namespace: req.Namespace, 821 JobID: req.JobID, 822 ScalingEvents: make(map[string][]*structs.ScalingEvent), 823 } 824 } 825 826 jobEvents.ModifyIndex = index 827 req.ScalingEvent.CreateIndex = index 828 829 events := jobEvents.ScalingEvents[req.TaskGroup] 830 // Prepend this latest event 831 events = append( 832 []*structs.ScalingEvent{req.ScalingEvent}, 833 events..., 834 ) 835 // Truncate older events 836 if len(events) > structs.JobTrackedScalingEvents { 837 events = events[0:structs.JobTrackedScalingEvents] 838 } 839 jobEvents.ScalingEvents[req.TaskGroup] = events 840 841 // Insert the new event 842 if err := txn.Insert("scaling_event", jobEvents); err != nil { 843 return fmt.Errorf("scaling event insert failed: %v", err) 844 } 845 846 // Update the indexes table for scaling_event 847 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 848 return fmt.Errorf("index update failed: %v", err) 849 } 850 851 return txn.Commit() 852 } 853 854 // ScalingEvents returns an iterator over all the job scaling events 855 func (s *StateStore) ScalingEvents(ws memdb.WatchSet) (memdb.ResultIterator, error) { 856 txn := s.db.ReadTxn() 857 858 // Walk the entire scaling_event table 859 iter, err := txn.Get("scaling_event", "id") 860 if err != nil { 861 return nil, err 862 } 863 864 ws.Add(iter.WatchCh()) 865 866 return iter, nil 867 } 868 869 func (s *StateStore) ScalingEventsByJob(ws memdb.WatchSet, namespace, jobID string) (map[string][]*structs.ScalingEvent, uint64, error) { 870 txn := s.db.ReadTxn() 871 872 watchCh, existing, err := txn.FirstWatch("scaling_event", "id", namespace, jobID) 873 if err != nil { 874 return nil, 0, fmt.Errorf("job scaling events lookup failed: %v", err) 875 } 876 ws.Add(watchCh) 877 878 if existing != nil { 879 events := existing.(*structs.JobScalingEvents) 880 return events.ScalingEvents, events.ModifyIndex, nil 881 } 882 return nil, 0, nil 883 } 884 885 // UpsertNode is used to register a node or update a node definition 886 // This is assumed to be triggered by the client, so we retain the value 887 // of drain/eligibility which is set by the scheduler. 888 func (s *StateStore) UpsertNode(msgType structs.MessageType, index uint64, node *structs.Node) error { 889 txn := s.db.WriteTxnMsgT(msgType, index) 890 defer txn.Abort() 891 892 err := upsertNodeTxn(txn, index, node) 893 if err != nil { 894 return nil 895 } 896 return txn.Commit() 897 } 898 899 func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { 900 // Check if the node already exists 901 existing, err := txn.First("nodes", "id", node.ID) 902 if err != nil { 903 return fmt.Errorf("node lookup failed: %v", err) 904 } 905 906 // Setup the indexes correctly 907 if existing != nil { 908 exist := existing.(*structs.Node) 909 node.CreateIndex = exist.CreateIndex 910 node.ModifyIndex = index 911 912 // Retain node events that have already been set on the node 913 node.Events = exist.Events 914 915 // If we are transitioning from down, record the re-registration 916 if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown { 917 appendNodeEvents(index, node, []*structs.NodeEvent{ 918 structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 919 SetMessage(NodeRegisterEventReregistered). 920 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))}) 921 } 922 923 node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility 924 node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy 925 node.LastDrain = exist.LastDrain // Retain the drain metadata 926 } else { 927 // Because this is the first time the node is being registered, we should 928 // also create a node registration event 929 nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 930 SetMessage(NodeRegisterEventRegistered). 931 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0)) 932 node.Events = []*structs.NodeEvent{nodeEvent} 933 node.CreateIndex = index 934 node.ModifyIndex = index 935 } 936 937 // Insert the node 938 if err := txn.Insert("nodes", node); err != nil { 939 return fmt.Errorf("node insert failed: %v", err) 940 } 941 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 942 return fmt.Errorf("index update failed: %v", err) 943 } 944 if err := upsertCSIPluginsForNode(txn, node, index); err != nil { 945 return fmt.Errorf("csi plugin update failed: %v", err) 946 } 947 948 return nil 949 } 950 951 // DeleteNode deregisters a batch of nodes 952 func (s *StateStore) DeleteNode(msgType structs.MessageType, index uint64, nodes []string) error { 953 txn := s.db.WriteTxn(index) 954 defer txn.Abort() 955 956 err := deleteNodeTxn(txn, index, nodes) 957 if err != nil { 958 return nil 959 } 960 return txn.Commit() 961 } 962 963 func deleteNodeTxn(txn *txn, index uint64, nodes []string) error { 964 if len(nodes) == 0 { 965 return fmt.Errorf("node ids missing") 966 } 967 968 for _, nodeID := range nodes { 969 existing, err := txn.First("nodes", "id", nodeID) 970 if err != nil { 971 return fmt.Errorf("node lookup failed: %s: %v", nodeID, err) 972 } 973 if existing == nil { 974 return fmt.Errorf("node not found: %s", nodeID) 975 } 976 977 // Delete the node 978 if err := txn.Delete("nodes", existing); err != nil { 979 return fmt.Errorf("node delete failed: %s: %v", nodeID, err) 980 } 981 982 node := existing.(*structs.Node) 983 if err := deleteNodeCSIPlugins(txn, node, index); err != nil { 984 return fmt.Errorf("csi plugin delete failed: %v", err) 985 } 986 } 987 988 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 989 return fmt.Errorf("index update failed: %v", err) 990 } 991 992 return nil 993 } 994 995 // UpdateNodeStatus is used to update the status of a node 996 func (s *StateStore) UpdateNodeStatus(msgType structs.MessageType, index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 997 txn := s.db.WriteTxnMsgT(msgType, index) 998 defer txn.Abort() 999 1000 if err := s.updateNodeStatusTxn(txn, nodeID, status, updatedAt, event); err != nil { 1001 return err 1002 } 1003 1004 return txn.Commit() 1005 } 1006 1007 func (s *StateStore) updateNodeStatusTxn(txn *txn, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 1008 1009 // Lookup the node 1010 existing, err := txn.First("nodes", "id", nodeID) 1011 if err != nil { 1012 return fmt.Errorf("node lookup failed: %v", err) 1013 } 1014 if existing == nil { 1015 return fmt.Errorf("node not found") 1016 } 1017 1018 // Copy the existing node 1019 existingNode := existing.(*structs.Node) 1020 copyNode := existingNode.Copy() 1021 copyNode.StatusUpdatedAt = updatedAt 1022 1023 // Add the event if given 1024 if event != nil { 1025 appendNodeEvents(txn.Index, copyNode, []*structs.NodeEvent{event}) 1026 } 1027 1028 // Update the status in the copy 1029 copyNode.Status = status 1030 copyNode.ModifyIndex = txn.Index 1031 1032 // Insert the node 1033 if err := txn.Insert("nodes", copyNode); err != nil { 1034 return fmt.Errorf("node update failed: %v", err) 1035 } 1036 if err := txn.Insert("index", &IndexEntry{"nodes", txn.Index}); err != nil { 1037 return fmt.Errorf("index update failed: %v", err) 1038 } 1039 return nil 1040 } 1041 1042 // BatchUpdateNodeDrain is used to update the drain of a node set of nodes. 1043 // This is currently only called when node drain is completed by the drainer. 1044 func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, 1045 updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { 1046 txn := s.db.WriteTxnMsgT(msgType, index) 1047 defer txn.Abort() 1048 for node, update := range updates { 1049 if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, 1050 events[node], nil, "", true); err != nil { 1051 return err 1052 } 1053 } 1054 return txn.Commit() 1055 } 1056 1057 // UpdateNodeDrain is used to update the drain of a node 1058 func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, 1059 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, 1060 event *structs.NodeEvent, drainMeta map[string]string, accessorId string) error { 1061 1062 txn := s.db.WriteTxnMsgT(msgType, index) 1063 defer txn.Abort() 1064 if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event, 1065 drainMeta, accessorId, false); err != nil { 1066 1067 return err 1068 } 1069 return txn.Commit() 1070 } 1071 1072 func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, 1073 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, 1074 event *structs.NodeEvent, drainMeta map[string]string, accessorId string, 1075 drainCompleted bool) error { 1076 1077 // Lookup the node 1078 existing, err := txn.First("nodes", "id", nodeID) 1079 if err != nil { 1080 return fmt.Errorf("node lookup failed: %v", err) 1081 } 1082 if existing == nil { 1083 return fmt.Errorf("node not found") 1084 } 1085 1086 // Copy the existing node 1087 existingNode := existing.(*structs.Node) 1088 updatedNode := existingNode.Copy() 1089 updatedNode.StatusUpdatedAt = updatedAt 1090 1091 // Add the event if given 1092 if event != nil { 1093 appendNodeEvents(index, updatedNode, []*structs.NodeEvent{event}) 1094 } 1095 1096 // Update the drain in the copy 1097 updatedNode.DrainStrategy = drain 1098 if drain != nil { 1099 updatedNode.SchedulingEligibility = structs.NodeSchedulingIneligible 1100 } else if markEligible { 1101 updatedNode.SchedulingEligibility = structs.NodeSchedulingEligible 1102 } 1103 1104 // Update LastDrain 1105 updateTime := time.Unix(updatedAt, 0) 1106 1107 // if drain strategy isn't set before or after, this wasn't a drain operation 1108 // in that case, we don't care about .LastDrain 1109 drainNoop := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy == nil 1110 // otherwise, when done with this method, updatedNode.LastDrain should be set 1111 // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. 1112 startedDraining := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil 1113 if !drainNoop { 1114 if startedDraining { 1115 updatedNode.LastDrain = &structs.DrainMetadata{ 1116 StartedAt: updateTime, 1117 Meta: drainMeta, 1118 } 1119 } else if updatedNode.LastDrain == nil { 1120 // if already draining and LastDrain doesn't exist, we need to create a new one 1121 // this could happen if we upgraded to 1.1.x during a drain 1122 updatedNode.LastDrain = &structs.DrainMetadata{ 1123 // we don't have sub-second accuracy on these fields, so truncate this 1124 StartedAt: time.Unix(existingNode.DrainStrategy.StartedAt.Unix(), 0), 1125 Meta: drainMeta, 1126 } 1127 } 1128 1129 updatedNode.LastDrain.UpdatedAt = updateTime 1130 1131 // won't have new metadata on drain complete; keep the existing operator-provided metadata 1132 // also, keep existing if they didn't provide it 1133 if len(drainMeta) != 0 { 1134 updatedNode.LastDrain.Meta = drainMeta 1135 } 1136 1137 // we won't have an accessor ID on drain complete, so don't overwrite the existing one 1138 if accessorId != "" { 1139 updatedNode.LastDrain.AccessorID = accessorId 1140 } 1141 1142 if updatedNode.DrainStrategy != nil { 1143 updatedNode.LastDrain.Status = structs.DrainStatusDraining 1144 } else if drainCompleted { 1145 updatedNode.LastDrain.Status = structs.DrainStatusComplete 1146 } else { 1147 updatedNode.LastDrain.Status = structs.DrainStatusCanceled 1148 } 1149 } 1150 1151 updatedNode.ModifyIndex = index 1152 1153 // Insert the node 1154 if err := txn.Insert("nodes", updatedNode); err != nil { 1155 return fmt.Errorf("node update failed: %v", err) 1156 } 1157 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1158 return fmt.Errorf("index update failed: %v", err) 1159 } 1160 1161 return nil 1162 } 1163 1164 // UpdateNodeEligibility is used to update the scheduling eligibility of a node 1165 func (s *StateStore) UpdateNodeEligibility(msgType structs.MessageType, index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error { 1166 txn := s.db.WriteTxnMsgT(msgType, index) 1167 defer txn.Abort() 1168 if err := s.updateNodeEligibilityImpl(index, nodeID, eligibility, updatedAt, event, txn); err != nil { 1169 return err 1170 } 1171 return txn.Commit() 1172 } 1173 1174 func (s *StateStore) updateNodeEligibilityImpl(index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent, txn *txn) error { 1175 // Lookup the node 1176 existing, err := txn.First("nodes", "id", nodeID) 1177 if err != nil { 1178 return fmt.Errorf("node lookup failed: %v", err) 1179 } 1180 if existing == nil { 1181 return fmt.Errorf("node not found") 1182 } 1183 1184 // Copy the existing node 1185 existingNode := existing.(*structs.Node) 1186 copyNode := existingNode.Copy() 1187 copyNode.StatusUpdatedAt = updatedAt 1188 1189 // Add the event if given 1190 if event != nil { 1191 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 1192 } 1193 1194 // Check if this is a valid action 1195 if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible { 1196 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 1197 } 1198 1199 // Update the eligibility in the copy 1200 copyNode.SchedulingEligibility = eligibility 1201 copyNode.ModifyIndex = index 1202 1203 // Insert the node 1204 if err := txn.Insert("nodes", copyNode); err != nil { 1205 return fmt.Errorf("node update failed: %v", err) 1206 } 1207 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1208 return fmt.Errorf("index update failed: %v", err) 1209 } 1210 1211 return nil 1212 } 1213 1214 // UpsertNodeEvents adds the node events to the nodes, rotating events as 1215 // necessary. 1216 func (s *StateStore) UpsertNodeEvents(msgType structs.MessageType, index uint64, nodeEvents map[string][]*structs.NodeEvent) error { 1217 txn := s.db.WriteTxnMsgT(msgType, index) 1218 defer txn.Abort() 1219 1220 for nodeID, events := range nodeEvents { 1221 if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil { 1222 return err 1223 } 1224 } 1225 1226 return txn.Commit() 1227 } 1228 1229 // upsertNodeEvent upserts a node event for a respective node. It also maintains 1230 // that a fixed number of node events are ever stored simultaneously, deleting 1231 // older events once this bound has been reached. 1232 func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *txn) error { 1233 // Lookup the node 1234 existing, err := txn.First("nodes", "id", nodeID) 1235 if err != nil { 1236 return fmt.Errorf("node lookup failed: %v", err) 1237 } 1238 if existing == nil { 1239 return fmt.Errorf("node not found") 1240 } 1241 1242 // Copy the existing node 1243 existingNode := existing.(*structs.Node) 1244 copyNode := existingNode.Copy() 1245 appendNodeEvents(index, copyNode, events) 1246 1247 // Insert the node 1248 if err := txn.Insert("nodes", copyNode); err != nil { 1249 return fmt.Errorf("node update failed: %v", err) 1250 } 1251 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1252 return fmt.Errorf("index update failed: %v", err) 1253 } 1254 1255 return nil 1256 } 1257 1258 // appendNodeEvents is a helper that takes a node and new events and appends 1259 // them, pruning older events as needed. 1260 func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) { 1261 // Add the events, updating the indexes 1262 for _, e := range events { 1263 e.CreateIndex = index 1264 node.Events = append(node.Events, e) 1265 } 1266 1267 // Keep node events pruned to not exceed the max allowed 1268 if l := len(node.Events); l > structs.MaxRetainedNodeEvents { 1269 delta := l - structs.MaxRetainedNodeEvents 1270 node.Events = node.Events[delta:] 1271 } 1272 } 1273 1274 // upsertCSIPluginsForNode indexes csi plugins for volume retrieval, with health. It's called 1275 // on upsertNodeEvents, so that event driven health changes are updated 1276 func upsertCSIPluginsForNode(txn *txn, node *structs.Node, index uint64) error { 1277 1278 upsertFn := func(info *structs.CSIInfo) error { 1279 raw, err := txn.First("csi_plugins", "id", info.PluginID) 1280 if err != nil { 1281 return fmt.Errorf("csi_plugin lookup error: %s %v", info.PluginID, err) 1282 } 1283 1284 var plug *structs.CSIPlugin 1285 if raw != nil { 1286 plug = raw.(*structs.CSIPlugin).Copy() 1287 } else { 1288 if !info.Healthy { 1289 // we don't want to create new plugins for unhealthy 1290 // allocs, otherwise we'd recreate the plugin when we 1291 // get the update for the alloc becoming terminal 1292 return nil 1293 } 1294 plug = structs.NewCSIPlugin(info.PluginID, index) 1295 } 1296 1297 // the plugin may have been created by the job being updated, in which case 1298 // this data will not be configured, it's only available to the fingerprint 1299 // system 1300 plug.Provider = info.Provider 1301 plug.Version = info.ProviderVersion 1302 1303 err = plug.AddPlugin(node.ID, info) 1304 if err != nil { 1305 return err 1306 } 1307 1308 plug.ModifyIndex = index 1309 1310 err = txn.Insert("csi_plugins", plug) 1311 if err != nil { 1312 return fmt.Errorf("csi_plugins insert error: %v", err) 1313 } 1314 1315 return nil 1316 } 1317 1318 inUseController := map[string]struct{}{} 1319 inUseNode := map[string]struct{}{} 1320 1321 for _, info := range node.CSIControllerPlugins { 1322 err := upsertFn(info) 1323 if err != nil { 1324 return err 1325 } 1326 inUseController[info.PluginID] = struct{}{} 1327 } 1328 1329 for _, info := range node.CSINodePlugins { 1330 err := upsertFn(info) 1331 if err != nil { 1332 return err 1333 } 1334 inUseNode[info.PluginID] = struct{}{} 1335 } 1336 1337 // remove the client node from any plugin that's not 1338 // running on it. 1339 iter, err := txn.Get("csi_plugins", "id") 1340 if err != nil { 1341 return fmt.Errorf("csi_plugins lookup failed: %v", err) 1342 } 1343 for { 1344 raw := iter.Next() 1345 if raw == nil { 1346 break 1347 } 1348 plug, ok := raw.(*structs.CSIPlugin) 1349 if !ok { 1350 continue 1351 } 1352 plug = plug.Copy() 1353 1354 var hadDelete bool 1355 if _, ok := inUseController[plug.ID]; !ok { 1356 if _, asController := plug.Controllers[node.ID]; asController { 1357 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeController) 1358 if err != nil { 1359 return err 1360 } 1361 hadDelete = true 1362 } 1363 } 1364 if _, ok := inUseNode[plug.ID]; !ok { 1365 if _, asNode := plug.Nodes[node.ID]; asNode { 1366 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeNode) 1367 if err != nil { 1368 return err 1369 } 1370 hadDelete = true 1371 } 1372 } 1373 // we check this flag both for performance and to make sure we 1374 // don't delete a plugin when registering a node plugin but 1375 // no controller 1376 if hadDelete { 1377 err = updateOrGCPlugin(index, txn, plug) 1378 if err != nil { 1379 return err 1380 } 1381 } 1382 } 1383 1384 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1385 return fmt.Errorf("index update failed: %v", err) 1386 } 1387 1388 return nil 1389 } 1390 1391 // deleteNodeCSIPlugins cleans up CSIInfo node health status, called in DeleteNode 1392 func deleteNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error { 1393 if len(node.CSIControllerPlugins) == 0 && len(node.CSINodePlugins) == 0 { 1394 return nil 1395 } 1396 1397 names := map[string]struct{}{} 1398 for _, info := range node.CSIControllerPlugins { 1399 names[info.PluginID] = struct{}{} 1400 } 1401 for _, info := range node.CSINodePlugins { 1402 names[info.PluginID] = struct{}{} 1403 } 1404 1405 for id := range names { 1406 raw, err := txn.First("csi_plugins", "id", id) 1407 if err != nil { 1408 return fmt.Errorf("csi_plugins lookup error %s: %v", id, err) 1409 } 1410 if raw == nil { 1411 // plugin may have been deregistered but we didn't 1412 // update the fingerprint yet 1413 continue 1414 } 1415 1416 plug := raw.(*structs.CSIPlugin).Copy() 1417 err = plug.DeleteNode(node.ID) 1418 if err != nil { 1419 return err 1420 } 1421 err = updateOrGCPlugin(index, txn, plug) 1422 if err != nil { 1423 return err 1424 } 1425 } 1426 1427 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1428 return fmt.Errorf("index update failed: %v", err) 1429 } 1430 1431 return nil 1432 } 1433 1434 // updateOrGCPlugin updates a plugin but will delete it if the plugin is empty 1435 func updateOrGCPlugin(index uint64, txn Txn, plug *structs.CSIPlugin) error { 1436 plug.ModifyIndex = index 1437 1438 if plug.IsEmpty() { 1439 err := txn.Delete("csi_plugins", plug) 1440 if err != nil { 1441 return fmt.Errorf("csi_plugins delete error: %v", err) 1442 } 1443 } else { 1444 err := txn.Insert("csi_plugins", plug) 1445 if err != nil { 1446 return fmt.Errorf("csi_plugins update error %s: %v", plug.ID, err) 1447 } 1448 } 1449 return nil 1450 } 1451 1452 // deleteJobFromPlugins removes the allocations of this job from any plugins the job is 1453 // running, possibly deleting the plugin if it's no longer in use. It's called in DeleteJobTxn 1454 func (s *StateStore) deleteJobFromPlugins(index uint64, txn Txn, job *structs.Job) error { 1455 ws := memdb.NewWatchSet() 1456 summary, err := s.JobSummaryByID(ws, job.Namespace, job.ID) 1457 if err != nil { 1458 return fmt.Errorf("error getting job summary: %v", err) 1459 } 1460 1461 allocs, err := s.AllocsByJob(ws, job.Namespace, job.ID, false) 1462 if err != nil { 1463 return fmt.Errorf("error getting allocations: %v", err) 1464 } 1465 1466 type pair struct { 1467 pluginID string 1468 alloc *structs.Allocation 1469 } 1470 1471 plugAllocs := []*pair{} 1472 found := map[string]struct{}{} 1473 1474 // Find plugins for allocs that belong to this job 1475 for _, a := range allocs { 1476 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1477 found[tg.Name] = struct{}{} 1478 for _, t := range tg.Tasks { 1479 if t.CSIPluginConfig == nil { 1480 continue 1481 } 1482 plugAllocs = append(plugAllocs, &pair{ 1483 pluginID: t.CSIPluginConfig.ID, 1484 alloc: a, 1485 }) 1486 } 1487 } 1488 1489 // Find any plugins that do not yet have allocs for this job 1490 for _, tg := range job.TaskGroups { 1491 if _, ok := found[tg.Name]; ok { 1492 continue 1493 } 1494 1495 for _, t := range tg.Tasks { 1496 if t.CSIPluginConfig == nil { 1497 continue 1498 } 1499 plugAllocs = append(plugAllocs, &pair{ 1500 pluginID: t.CSIPluginConfig.ID, 1501 }) 1502 } 1503 } 1504 1505 plugins := map[string]*structs.CSIPlugin{} 1506 1507 for _, x := range plugAllocs { 1508 plug, ok := plugins[x.pluginID] 1509 1510 if !ok { 1511 plug, err = s.CSIPluginByIDTxn(txn, nil, x.pluginID) 1512 if err != nil { 1513 return fmt.Errorf("error getting plugin: %s, %v", x.pluginID, err) 1514 } 1515 if plug == nil { 1516 // plugin was never successfully registered or has been 1517 // GC'd out from under us 1518 continue 1519 } 1520 // only copy once, so we update the same plugin on each alloc 1521 plugins[x.pluginID] = plug.Copy() 1522 plug = plugins[x.pluginID] 1523 } 1524 1525 if x.alloc == nil { 1526 continue 1527 } 1528 err := plug.DeleteAlloc(x.alloc.ID, x.alloc.NodeID) 1529 if err != nil { 1530 return err 1531 } 1532 } 1533 1534 for _, plug := range plugins { 1535 plug.DeleteJob(job, summary) 1536 err = updateOrGCPlugin(index, txn, plug) 1537 if err != nil { 1538 return err 1539 } 1540 } 1541 1542 if len(plugins) > 0 { 1543 if err = txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1544 return fmt.Errorf("index update failed: %v", err) 1545 } 1546 } 1547 1548 return nil 1549 } 1550 1551 // NodeByID is used to lookup a node by ID 1552 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 1553 txn := s.db.ReadTxn() 1554 1555 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 1556 if err != nil { 1557 return nil, fmt.Errorf("node lookup failed: %v", err) 1558 } 1559 ws.Add(watchCh) 1560 1561 if existing != nil { 1562 return existing.(*structs.Node), nil 1563 } 1564 return nil, nil 1565 } 1566 1567 // NodesByIDPrefix is used to lookup nodes by prefix 1568 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 1569 txn := s.db.ReadTxn() 1570 1571 iter, err := txn.Get("nodes", "id_prefix", nodeID) 1572 if err != nil { 1573 return nil, fmt.Errorf("node lookup failed: %v", err) 1574 } 1575 ws.Add(iter.WatchCh()) 1576 1577 return iter, nil 1578 } 1579 1580 // NodeBySecretID is used to lookup a node by SecretID 1581 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 1582 txn := s.db.ReadTxn() 1583 1584 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 1585 if err != nil { 1586 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 1587 } 1588 ws.Add(watchCh) 1589 1590 if existing != nil { 1591 return existing.(*structs.Node), nil 1592 } 1593 return nil, nil 1594 } 1595 1596 // Nodes returns an iterator over all the nodes 1597 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1598 txn := s.db.ReadTxn() 1599 1600 // Walk the entire nodes table 1601 iter, err := txn.Get("nodes", "id") 1602 if err != nil { 1603 return nil, err 1604 } 1605 ws.Add(iter.WatchCh()) 1606 return iter, nil 1607 } 1608 1609 // UpsertJob is used to register a job or update a job definition 1610 func (s *StateStore) UpsertJob(msgType structs.MessageType, index uint64, job *structs.Job) error { 1611 txn := s.db.WriteTxnMsgT(msgType, index) 1612 defer txn.Abort() 1613 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 1614 return err 1615 } 1616 return txn.Commit() 1617 } 1618 1619 // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob, 1620 // but in a transaction. Useful for when making multiple modifications atomically 1621 func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error { 1622 return s.upsertJobImpl(index, job, false, txn) 1623 } 1624 1625 // upsertJobImpl is the implementation for registering a job or updating a job definition 1626 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *txn) error { 1627 // Assert the namespace exists 1628 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 1629 return err 1630 } else if !exists { 1631 return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace) 1632 } 1633 1634 // Check if the job already exists 1635 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 1636 var existingJob *structs.Job 1637 if err != nil { 1638 return fmt.Errorf("job lookup failed: %v", err) 1639 } 1640 1641 // Setup the indexes correctly 1642 if existing != nil { 1643 job.CreateIndex = existing.(*structs.Job).CreateIndex 1644 job.ModifyIndex = index 1645 1646 existingJob = existing.(*structs.Job) 1647 1648 // Bump the version unless asked to keep it. This should only be done 1649 // when changing an internal field such as Stable. A spec change should 1650 // always come with a version bump 1651 if !keepVersion { 1652 job.JobModifyIndex = index 1653 if job.Version <= existingJob.Version { 1654 job.Version = existingJob.Version + 1 1655 } 1656 } 1657 1658 // Compute the job status 1659 var err error 1660 job.Status, err = s.getJobStatus(txn, job, false) 1661 if err != nil { 1662 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1663 } 1664 } else { 1665 job.CreateIndex = index 1666 job.ModifyIndex = index 1667 job.JobModifyIndex = index 1668 1669 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 1670 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1671 } 1672 1673 // Have to get the job again since it could have been updated 1674 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 1675 if err != nil { 1676 return fmt.Errorf("job lookup failed: %v", err) 1677 } 1678 if updated != nil { 1679 job = updated.(*structs.Job) 1680 } 1681 } 1682 1683 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 1684 return fmt.Errorf("unable to create job summary: %v", err) 1685 } 1686 1687 if err := s.upsertJobVersion(index, job, txn); err != nil { 1688 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 1689 } 1690 1691 if err := s.updateJobScalingPolicies(index, job, txn); err != nil { 1692 return fmt.Errorf("unable to update job scaling policies: %v", err) 1693 } 1694 1695 if err := s.updateJobRecommendations(index, txn, existingJob, job); err != nil { 1696 return fmt.Errorf("unable to update job recommendations: %v", err) 1697 } 1698 1699 if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil { 1700 return fmt.Errorf("unable to update job csi plugins: %v", err) 1701 } 1702 1703 // Insert the job 1704 if err := txn.Insert("jobs", job); err != nil { 1705 return fmt.Errorf("job insert failed: %v", err) 1706 } 1707 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1708 return fmt.Errorf("index update failed: %v", err) 1709 } 1710 1711 return nil 1712 } 1713 1714 // DeleteJob is used to deregister a job 1715 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 1716 txn := s.db.WriteTxn(index) 1717 defer txn.Abort() 1718 1719 err := s.DeleteJobTxn(index, namespace, jobID, txn) 1720 if err == nil { 1721 return txn.Commit() 1722 } 1723 return err 1724 } 1725 1726 // DeleteJobTxn is used to deregister a job, like DeleteJob, 1727 // but in a transaction. Useful for when making multiple modifications atomically 1728 func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error { 1729 // Lookup the node 1730 existing, err := txn.First("jobs", "id", namespace, jobID) 1731 if err != nil { 1732 return fmt.Errorf("job lookup failed: %v", err) 1733 } 1734 if existing == nil { 1735 return fmt.Errorf("job not found") 1736 } 1737 1738 // Check if we should update a parent job summary 1739 job := existing.(*structs.Job) 1740 if job.ParentID != "" { 1741 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 1742 if err != nil { 1743 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 1744 } 1745 1746 // Only continue if the summary exists. It could not exist if the parent 1747 // job was removed 1748 if summaryRaw != nil { 1749 existing := summaryRaw.(*structs.JobSummary) 1750 pSummary := existing.Copy() 1751 if pSummary.Children != nil { 1752 1753 modified := false 1754 switch job.Status { 1755 case structs.JobStatusPending: 1756 pSummary.Children.Pending-- 1757 pSummary.Children.Dead++ 1758 modified = true 1759 case structs.JobStatusRunning: 1760 pSummary.Children.Running-- 1761 pSummary.Children.Dead++ 1762 modified = true 1763 case structs.JobStatusDead: 1764 default: 1765 return fmt.Errorf("unknown old job status %q", job.Status) 1766 } 1767 1768 if modified { 1769 // Update the modify index 1770 pSummary.ModifyIndex = index 1771 1772 // Insert the summary 1773 if err := txn.Insert("job_summary", pSummary); err != nil { 1774 return fmt.Errorf("job summary insert failed: %v", err) 1775 } 1776 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1777 return fmt.Errorf("index update failed: %v", err) 1778 } 1779 } 1780 } 1781 } 1782 } 1783 1784 // Delete the job 1785 if err := txn.Delete("jobs", existing); err != nil { 1786 return fmt.Errorf("job delete failed: %v", err) 1787 } 1788 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1789 return fmt.Errorf("index update failed: %v", err) 1790 } 1791 1792 // Delete the job versions 1793 if err := s.deleteJobVersions(index, job, txn); err != nil { 1794 return err 1795 } 1796 1797 // Cleanup plugins registered by this job, before we delete the summary 1798 err = s.deleteJobFromPlugins(index, txn, job) 1799 if err != nil { 1800 return fmt.Errorf("deleting job from plugin: %v", err) 1801 } 1802 1803 // Delete the job summary 1804 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 1805 return fmt.Errorf("deleting job summary failed: %v", err) 1806 } 1807 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1808 return fmt.Errorf("index update failed: %v", err) 1809 } 1810 1811 // Delete any remaining job scaling policies 1812 if err := s.deleteJobScalingPolicies(index, job, txn); err != nil { 1813 return fmt.Errorf("deleting job scaling policies failed: %v", err) 1814 } 1815 1816 // Delete any job recommendations 1817 if err := s.deleteRecommendationsByJob(index, txn, job); err != nil { 1818 return fmt.Errorf("deleting job recommendatons failed: %v", err) 1819 } 1820 1821 // Delete the scaling events 1822 if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil { 1823 return fmt.Errorf("deleting job scaling events failed: %v", err) 1824 } 1825 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 1826 return fmt.Errorf("index update failed: %v", err) 1827 } 1828 1829 return nil 1830 } 1831 1832 // deleteJobScalingPolicies deletes any scaling policies associated with the job 1833 func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 1834 iter, err := s.ScalingPoliciesByJobTxn(nil, job.Namespace, job.ID, txn) 1835 if err != nil { 1836 return fmt.Errorf("getting job scaling policies for deletion failed: %v", err) 1837 } 1838 1839 // Put them into a slice so there are no safety concerns while actually 1840 // performing the deletes 1841 policies := []interface{}{} 1842 for { 1843 raw := iter.Next() 1844 if raw == nil { 1845 break 1846 } 1847 policies = append(policies, raw) 1848 } 1849 1850 // Do the deletes 1851 for _, p := range policies { 1852 if err := txn.Delete("scaling_policy", p); err != nil { 1853 return fmt.Errorf("deleting scaling policy failed: %v", err) 1854 } 1855 } 1856 1857 if len(policies) > 0 { 1858 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 1859 return fmt.Errorf("index update failed: %v", err) 1860 } 1861 } 1862 return nil 1863 } 1864 1865 // deleteJobVersions deletes all versions of the given job. 1866 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *txn) error { 1867 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 1868 if err != nil { 1869 return err 1870 } 1871 1872 // Put them into a slice so there are no safety concerns while actually 1873 // performing the deletes 1874 jobs := []*structs.Job{} 1875 for { 1876 raw := iter.Next() 1877 if raw == nil { 1878 break 1879 } 1880 1881 // Ensure the ID is an exact match 1882 j := raw.(*structs.Job) 1883 if j.ID != job.ID { 1884 continue 1885 } 1886 1887 jobs = append(jobs, j) 1888 } 1889 1890 // Do the deletes 1891 for _, j := range jobs { 1892 if err := txn.Delete("job_version", j); err != nil { 1893 return fmt.Errorf("deleting job versions failed: %v", err) 1894 } 1895 } 1896 1897 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1898 return fmt.Errorf("index update failed: %v", err) 1899 } 1900 1901 return nil 1902 } 1903 1904 // upsertJobVersion inserts a job into its historic version table and limits the 1905 // number of job versions that are tracked. 1906 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *txn) error { 1907 // Insert the job 1908 if err := txn.Insert("job_version", job); err != nil { 1909 return fmt.Errorf("failed to insert job into job_version table: %v", err) 1910 } 1911 1912 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1913 return fmt.Errorf("index update failed: %v", err) 1914 } 1915 1916 // Get all the historic jobs for this ID 1917 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 1918 if err != nil { 1919 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 1920 } 1921 1922 // If we are below the limit there is no GCing to be done 1923 if len(all) <= structs.JobTrackedVersions { 1924 return nil 1925 } 1926 1927 // We have to delete a historic job to make room. 1928 // Find index of the highest versioned stable job 1929 stableIdx := -1 1930 for i, j := range all { 1931 if j.Stable { 1932 stableIdx = i 1933 break 1934 } 1935 } 1936 1937 // If the stable job is the oldest version, do a swap to bring it into the 1938 // keep set. 1939 max := structs.JobTrackedVersions 1940 if stableIdx == max { 1941 all[max-1], all[max] = all[max], all[max-1] 1942 } 1943 1944 // Delete the job outside of the set that are being kept. 1945 d := all[max] 1946 if err := txn.Delete("job_version", d); err != nil { 1947 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 1948 } 1949 1950 return nil 1951 } 1952 1953 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 1954 // version. 1955 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 1956 txn := s.db.ReadTxn() 1957 return s.JobByIDTxn(ws, namespace, id, txn) 1958 } 1959 1960 // JobByIDTxn is used to lookup a job by its ID, like JobByID. JobByID returns the job version 1961 // accessible through in the transaction 1962 func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) { 1963 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 1964 if err != nil { 1965 return nil, fmt.Errorf("job lookup failed: %v", err) 1966 } 1967 ws.Add(watchCh) 1968 1969 if existing != nil { 1970 return existing.(*structs.Job), nil 1971 } 1972 return nil, nil 1973 } 1974 1975 // JobsByIDPrefix is used to lookup a job by prefix. If querying all namespaces 1976 // the prefix will not be filtered by an index. 1977 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1978 if namespace == structs.AllNamespacesSentinel { 1979 return s.jobsByIDPrefixAllNamespaces(ws, id) 1980 } 1981 1982 txn := s.db.ReadTxn() 1983 1984 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 1985 if err != nil { 1986 return nil, fmt.Errorf("job lookup failed: %v", err) 1987 } 1988 1989 ws.Add(iter.WatchCh()) 1990 1991 return iter, nil 1992 } 1993 1994 func (s *StateStore) jobsByIDPrefixAllNamespaces(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 1995 txn := s.db.ReadTxn() 1996 1997 // Walk the entire jobs table 1998 iter, err := txn.Get("jobs", "id") 1999 2000 if err != nil { 2001 return nil, err 2002 } 2003 2004 ws.Add(iter.WatchCh()) 2005 2006 // Filter the iterator by ID prefix 2007 f := func(raw interface{}) bool { 2008 job, ok := raw.(*structs.Job) 2009 if !ok { 2010 return true 2011 } 2012 return !strings.HasPrefix(job.ID, prefix) 2013 } 2014 wrap := memdb.NewFilterIterator(iter, f) 2015 return wrap, nil 2016 } 2017 2018 // JobVersionsByID returns all the tracked versions of a job. 2019 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 2020 txn := s.db.ReadTxn() 2021 2022 return s.jobVersionByID(txn, ws, namespace, id) 2023 } 2024 2025 // jobVersionByID is the underlying implementation for retrieving all tracked 2026 // versions of a job and is called under an existing transaction. A watch set 2027 // can optionally be passed in to add the job histories to the watch set. 2028 func (s *StateStore) jobVersionByID(txn *txn, ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 2029 // Get all the historic jobs for this ID 2030 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 2031 if err != nil { 2032 return nil, err 2033 } 2034 2035 ws.Add(iter.WatchCh()) 2036 2037 var all []*structs.Job 2038 for { 2039 raw := iter.Next() 2040 if raw == nil { 2041 break 2042 } 2043 2044 // Ensure the ID is an exact match 2045 j := raw.(*structs.Job) 2046 if j.ID != id { 2047 continue 2048 } 2049 2050 all = append(all, j) 2051 } 2052 2053 // Sort in reverse order so that the highest version is first 2054 sort.Slice(all, func(i, j int) bool { 2055 return all[i].Version > all[j].Version 2056 }) 2057 2058 return all, nil 2059 } 2060 2061 // JobByIDAndVersion returns the job identified by its ID and Version. The 2062 // passed watchset may be nil. 2063 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 2064 txn := s.db.ReadTxn() 2065 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 2066 } 2067 2068 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 2069 // passed watchset may be nil. 2070 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 2071 version uint64, txn *txn) (*structs.Job, error) { 2072 2073 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 2074 if err != nil { 2075 return nil, err 2076 } 2077 2078 ws.Add(watchCh) 2079 2080 if existing != nil { 2081 job := existing.(*structs.Job) 2082 return job, nil 2083 } 2084 2085 return nil, nil 2086 } 2087 2088 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2089 txn := s.db.ReadTxn() 2090 2091 // Walk the entire deployments table 2092 iter, err := txn.Get("job_version", "id") 2093 if err != nil { 2094 return nil, err 2095 } 2096 2097 ws.Add(iter.WatchCh()) 2098 return iter, nil 2099 } 2100 2101 // Jobs returns an iterator over all the jobs 2102 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2103 txn := s.db.ReadTxn() 2104 2105 // Walk the entire jobs table 2106 iter, err := txn.Get("jobs", "id") 2107 if err != nil { 2108 return nil, err 2109 } 2110 2111 ws.Add(iter.WatchCh()) 2112 2113 return iter, nil 2114 } 2115 2116 // JobsByNamespace returns an iterator over all the jobs for the given namespace 2117 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2118 txn := s.db.ReadTxn() 2119 return s.jobsByNamespaceImpl(ws, namespace, txn) 2120 } 2121 2122 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 2123 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *txn) (memdb.ResultIterator, error) { 2124 // Walk the entire jobs table 2125 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 2126 if err != nil { 2127 return nil, err 2128 } 2129 2130 ws.Add(iter.WatchCh()) 2131 2132 return iter, nil 2133 } 2134 2135 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 2136 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 2137 txn := s.db.ReadTxn() 2138 2139 iter, err := txn.Get("jobs", "periodic", periodic) 2140 if err != nil { 2141 return nil, err 2142 } 2143 2144 ws.Add(iter.WatchCh()) 2145 2146 return iter, nil 2147 } 2148 2149 // JobsByScheduler returns an iterator over all the jobs with the specific 2150 // scheduler type. 2151 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 2152 txn := s.db.ReadTxn() 2153 2154 // Return an iterator for jobs with the specific type. 2155 iter, err := txn.Get("jobs", "type", schedulerType) 2156 if err != nil { 2157 return nil, err 2158 } 2159 2160 ws.Add(iter.WatchCh()) 2161 2162 return iter, nil 2163 } 2164 2165 // JobsByGC returns an iterator over all jobs eligible or ineligible for garbage 2166 // collection. 2167 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 2168 txn := s.db.ReadTxn() 2169 2170 iter, err := txn.Get("jobs", "gc", gc) 2171 if err != nil { 2172 return nil, err 2173 } 2174 2175 ws.Add(iter.WatchCh()) 2176 2177 return iter, nil 2178 } 2179 2180 // JobSummaryByID returns a job summary object which matches a specific id. 2181 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 2182 txn := s.db.ReadTxn() 2183 2184 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 2185 if err != nil { 2186 return nil, err 2187 } 2188 2189 ws.Add(watchCh) 2190 2191 if existing != nil { 2192 summary := existing.(*structs.JobSummary) 2193 return summary, nil 2194 } 2195 2196 return nil, nil 2197 } 2198 2199 // JobSummaries walks the entire job summary table and returns all the job 2200 // summary objects 2201 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2202 txn := s.db.ReadTxn() 2203 2204 iter, err := txn.Get("job_summary", "id") 2205 if err != nil { 2206 return nil, err 2207 } 2208 2209 ws.Add(iter.WatchCh()) 2210 2211 return iter, nil 2212 } 2213 2214 // JobSummaryByPrefix is used to look up Job Summary by id prefix 2215 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2216 txn := s.db.ReadTxn() 2217 2218 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 2219 if err != nil { 2220 return nil, fmt.Errorf("job_summary lookup failed: %v", err) 2221 } 2222 2223 ws.Add(iter.WatchCh()) 2224 2225 return iter, nil 2226 } 2227 2228 // UpsertCSIVolume inserts a volume in the state store. 2229 func (s *StateStore) UpsertCSIVolume(index uint64, volumes []*structs.CSIVolume) error { 2230 txn := s.db.WriteTxn(index) 2231 defer txn.Abort() 2232 2233 for _, v := range volumes { 2234 if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { 2235 return err 2236 } else if !exists { 2237 return fmt.Errorf("volume %s is in nonexistent namespace %s", v.ID, v.Namespace) 2238 } 2239 2240 obj, err := txn.First("csi_volumes", "id", v.Namespace, v.ID) 2241 if err != nil { 2242 return fmt.Errorf("volume existence check error: %v", err) 2243 } 2244 if obj != nil { 2245 // Allow some properties of a volume to be updated in place, but 2246 // prevent accidentally overwriting important properties, or 2247 // overwriting a volume in use 2248 old := obj.(*structs.CSIVolume) 2249 if old.ExternalID != v.ExternalID || 2250 old.PluginID != v.PluginID || 2251 old.Provider != v.Provider { 2252 return fmt.Errorf("volume identity cannot be updated: %s", v.ID) 2253 } 2254 s.CSIVolumeDenormalize(nil, old.Copy()) 2255 if old.InUse() { 2256 return fmt.Errorf("volume cannot be updated while in use") 2257 } 2258 2259 v.CreateIndex = old.CreateIndex 2260 v.ModifyIndex = index 2261 } else { 2262 v.CreateIndex = index 2263 v.ModifyIndex = index 2264 } 2265 2266 // Allocations are copy on write, so we want to keep the Allocation ID 2267 // but we need to clear the pointer so that we don't store it when we 2268 // write the volume to the state store. We'll get it from the db in 2269 // denormalize. 2270 for allocID := range v.ReadAllocs { 2271 v.ReadAllocs[allocID] = nil 2272 } 2273 for allocID := range v.WriteAllocs { 2274 v.WriteAllocs[allocID] = nil 2275 } 2276 2277 err = txn.Insert("csi_volumes", v) 2278 if err != nil { 2279 return fmt.Errorf("volume insert: %v", err) 2280 } 2281 } 2282 2283 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2284 return fmt.Errorf("index update failed: %v", err) 2285 } 2286 2287 return txn.Commit() 2288 } 2289 2290 // CSIVolumes returns the unfiltered list of all volumes. Caller should 2291 // snapshot if it wants to also denormalize the plugins. 2292 func (s *StateStore) CSIVolumes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2293 txn := s.db.ReadTxn() 2294 defer txn.Abort() 2295 2296 iter, err := txn.Get("csi_volumes", "id") 2297 if err != nil { 2298 return nil, fmt.Errorf("csi_volumes lookup failed: %v", err) 2299 } 2300 2301 ws.Add(iter.WatchCh()) 2302 2303 return iter, nil 2304 } 2305 2306 // CSIVolumeByID is used to lookup a single volume. Returns a copy of the 2307 // volume because its plugins and allocations are denormalized to provide 2308 // accurate Health. 2309 func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*structs.CSIVolume, error) { 2310 txn := s.db.ReadTxn() 2311 2312 watchCh, obj, err := txn.FirstWatch("csi_volumes", "id", namespace, id) 2313 if err != nil { 2314 return nil, fmt.Errorf("volume lookup failed for %s: %v", id, err) 2315 } 2316 ws.Add(watchCh) 2317 2318 if obj == nil { 2319 return nil, nil 2320 } 2321 vol := obj.(*structs.CSIVolume) 2322 2323 // we return the volume with the plugins denormalized by default, 2324 // because the scheduler needs them for feasibility checking 2325 return s.csiVolumeDenormalizePluginsTxn(txn, vol.Copy()) 2326 } 2327 2328 // CSIVolumesByPluginID looks up csi_volumes by pluginID. Caller should 2329 // snapshot if it wants to also denormalize the plugins. 2330 func (s *StateStore) CSIVolumesByPluginID(ws memdb.WatchSet, namespace, prefix, pluginID string) (memdb.ResultIterator, error) { 2331 txn := s.db.ReadTxn() 2332 2333 iter, err := txn.Get("csi_volumes", "plugin_id", pluginID) 2334 if err != nil { 2335 return nil, fmt.Errorf("volume lookup failed: %v", err) 2336 } 2337 2338 // Filter the iterator by namespace 2339 f := func(raw interface{}) bool { 2340 v, ok := raw.(*structs.CSIVolume) 2341 if !ok { 2342 return false 2343 } 2344 return v.Namespace != namespace && strings.HasPrefix(v.ID, prefix) 2345 } 2346 2347 wrap := memdb.NewFilterIterator(iter, f) 2348 return wrap, nil 2349 } 2350 2351 // CSIVolumesByIDPrefix supports search. Caller should snapshot if it wants to 2352 // also denormalize the plugins. If using a prefix with the wildcard namespace, 2353 // the results will not use the index prefix. 2354 func (s *StateStore) CSIVolumesByIDPrefix(ws memdb.WatchSet, namespace, volumeID string) (memdb.ResultIterator, error) { 2355 if namespace == structs.AllNamespacesSentinel { 2356 return s.csiVolumeByIDPrefixAllNamespaces(ws, volumeID) 2357 } 2358 2359 txn := s.db.ReadTxn() 2360 2361 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, volumeID) 2362 if err != nil { 2363 return nil, err 2364 } 2365 2366 ws.Add(iter.WatchCh()) 2367 2368 return iter, nil 2369 } 2370 2371 func (s *StateStore) csiVolumeByIDPrefixAllNamespaces(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 2372 txn := s.db.ReadTxn() 2373 2374 // Walk the entire csi_volumes table 2375 iter, err := txn.Get("csi_volumes", "id") 2376 2377 if err != nil { 2378 return nil, err 2379 } 2380 2381 ws.Add(iter.WatchCh()) 2382 2383 // Filter the iterator by ID prefix 2384 f := func(raw interface{}) bool { 2385 v, ok := raw.(*structs.CSIVolume) 2386 if !ok { 2387 return false 2388 } 2389 return !strings.HasPrefix(v.ID, prefix) 2390 } 2391 wrap := memdb.NewFilterIterator(iter, f) 2392 return wrap, nil 2393 } 2394 2395 // CSIVolumesByNodeID looks up CSIVolumes in use on a node. Caller should 2396 // snapshot if it wants to also denormalize the plugins. 2397 func (s *StateStore) CSIVolumesByNodeID(ws memdb.WatchSet, prefix, nodeID string) (memdb.ResultIterator, error) { 2398 allocs, err := s.AllocsByNode(ws, nodeID) 2399 if err != nil { 2400 return nil, fmt.Errorf("alloc lookup failed: %v", err) 2401 } 2402 2403 // Find volume ids for CSI volumes in running allocs, or allocs that we desire to run 2404 ids := map[string]string{} // Map volumeID to Namespace 2405 for _, a := range allocs { 2406 tg := a.Job.LookupTaskGroup(a.TaskGroup) 2407 2408 if !(a.DesiredStatus == structs.AllocDesiredStatusRun || 2409 a.ClientStatus == structs.AllocClientStatusRunning) || 2410 len(tg.Volumes) == 0 { 2411 continue 2412 } 2413 2414 for _, v := range tg.Volumes { 2415 if v.Type != structs.VolumeTypeCSI { 2416 continue 2417 } 2418 ids[v.Source] = a.Namespace 2419 } 2420 } 2421 2422 // Lookup the raw CSIVolumes to match the other list interfaces 2423 iter := NewSliceIterator() 2424 txn := s.db.ReadTxn() 2425 for id, namespace := range ids { 2426 if strings.HasPrefix(id, prefix) { 2427 watchCh, raw, err := txn.FirstWatch("csi_volumes", "id", namespace, id) 2428 if err != nil { 2429 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 2430 } 2431 ws.Add(watchCh) 2432 iter.Add(raw) 2433 } 2434 } 2435 2436 return iter, nil 2437 } 2438 2439 // CSIVolumesByNamespace looks up the entire csi_volumes table 2440 func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace, prefix string) (memdb.ResultIterator, error) { 2441 txn := s.db.ReadTxn() 2442 2443 return s.csiVolumesByNamespaceImpl(txn, ws, namespace, prefix) 2444 } 2445 2446 func (s *StateStore) csiVolumesByNamespaceImpl(txn *txn, ws memdb.WatchSet, namespace, prefix string) (memdb.ResultIterator, error) { 2447 2448 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, prefix) 2449 if err != nil { 2450 return nil, fmt.Errorf("volume lookup failed: %v", err) 2451 } 2452 2453 ws.Add(iter.WatchCh()) 2454 2455 return iter, nil 2456 } 2457 2458 // CSIVolumeClaim updates the volume's claim count and allocation list 2459 func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error { 2460 txn := s.db.WriteTxn(index) 2461 defer txn.Abort() 2462 2463 row, err := txn.First("csi_volumes", "id", namespace, id) 2464 if err != nil { 2465 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2466 } 2467 if row == nil { 2468 return fmt.Errorf("volume not found: %s", id) 2469 } 2470 2471 orig, ok := row.(*structs.CSIVolume) 2472 if !ok { 2473 return fmt.Errorf("volume row conversion error") 2474 } 2475 2476 var alloc *structs.Allocation 2477 if claim.State == structs.CSIVolumeClaimStateTaken { 2478 alloc, err = s.allocByIDImpl(txn, nil, claim.AllocationID) 2479 if err != nil { 2480 s.logger.Error("AllocByID failed", "error", err) 2481 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2482 } 2483 if alloc == nil { 2484 s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID) 2485 if err != nil { 2486 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2487 } 2488 } 2489 } 2490 2491 volume, err := s.csiVolumeDenormalizePluginsTxn(txn, orig.Copy()) 2492 if err != nil { 2493 return err 2494 } 2495 volume, err = s.csiVolumeDenormalizeTxn(txn, nil, volume) 2496 if err != nil { 2497 return err 2498 } 2499 2500 // in the case of a job deregistration, there will be no allocation ID 2501 // for the claim but we still want to write an updated index to the volume 2502 // so that volume reaping is triggered 2503 if claim.AllocationID != "" { 2504 err = volume.Claim(claim, alloc) 2505 if err != nil { 2506 return err 2507 } 2508 } 2509 2510 volume.ModifyIndex = index 2511 2512 // Allocations are copy on write, so we want to keep the Allocation ID 2513 // but we need to clear the pointer so that we don't store it when we 2514 // write the volume to the state store. We'll get it from the db in 2515 // denormalize. 2516 for allocID := range volume.ReadAllocs { 2517 volume.ReadAllocs[allocID] = nil 2518 } 2519 for allocID := range volume.WriteAllocs { 2520 volume.WriteAllocs[allocID] = nil 2521 } 2522 2523 if err = txn.Insert("csi_volumes", volume); err != nil { 2524 return fmt.Errorf("volume update failed: %s: %v", id, err) 2525 } 2526 2527 if err = txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2528 return fmt.Errorf("index update failed: %v", err) 2529 } 2530 2531 return txn.Commit() 2532 } 2533 2534 // CSIVolumeDeregister removes the volume from the server 2535 func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []string, force bool) error { 2536 txn := s.db.WriteTxn(index) 2537 defer txn.Abort() 2538 2539 for _, id := range ids { 2540 existing, err := txn.First("csi_volumes", "id", namespace, id) 2541 if err != nil { 2542 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2543 } 2544 2545 if existing == nil { 2546 return fmt.Errorf("volume not found: %s", id) 2547 } 2548 2549 vol, ok := existing.(*structs.CSIVolume) 2550 if !ok { 2551 return fmt.Errorf("volume row conversion error: %s", id) 2552 } 2553 2554 // The common case for a volume deregister is when the volume is 2555 // unused, but we can also let an operator intervene in the case where 2556 // allocations have been stopped but claims can't be freed because 2557 // ex. the plugins have all been removed. 2558 if vol.InUse() { 2559 if !force || !s.volSafeToForce(txn, vol) { 2560 return fmt.Errorf("volume in use: %s", id) 2561 } 2562 } 2563 2564 if err = txn.Delete("csi_volumes", existing); err != nil { 2565 return fmt.Errorf("volume delete failed: %s: %v", id, err) 2566 } 2567 } 2568 2569 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2570 return fmt.Errorf("index update failed: %v", err) 2571 } 2572 2573 return txn.Commit() 2574 } 2575 2576 // volSafeToForce checks if the any of the remaining allocations 2577 // are in a non-terminal state. 2578 func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool { 2579 vol, err := s.csiVolumeDenormalizeTxn(txn, nil, v) 2580 if err != nil { 2581 return false 2582 } 2583 2584 for _, alloc := range vol.ReadAllocs { 2585 if alloc != nil && !alloc.TerminalStatus() { 2586 return false 2587 } 2588 } 2589 for _, alloc := range vol.WriteAllocs { 2590 if alloc != nil && !alloc.TerminalStatus() { 2591 return false 2592 } 2593 } 2594 return true 2595 } 2596 2597 // CSIVolumeDenormalizePlugins returns a CSIVolume with current health and 2598 // plugins, but without allocations. 2599 // Use this for current volume metadata, handling lists of volumes. 2600 // Use CSIVolumeDenormalize for volumes containing both health and current 2601 // allocations. 2602 func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2603 if vol == nil { 2604 return nil, nil 2605 } 2606 txn := s.db.ReadTxn() 2607 defer txn.Abort() 2608 return s.csiVolumeDenormalizePluginsTxn(txn, vol) 2609 } 2610 2611 // csiVolumeDenormalizePluginsTxn implements 2612 // CSIVolumeDenormalizePlugins, inside a transaction. 2613 func (s *StateStore) csiVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2614 if vol == nil { 2615 return nil, nil 2616 } 2617 plug, err := s.CSIPluginByIDTxn(txn, nil, vol.PluginID) 2618 if err != nil { 2619 return nil, fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err) 2620 } 2621 if plug == nil { 2622 vol.ControllersHealthy = 0 2623 vol.NodesHealthy = 0 2624 vol.Schedulable = false 2625 return vol, nil 2626 } 2627 2628 vol.Provider = plug.Provider 2629 vol.ProviderVersion = plug.Version 2630 vol.ControllerRequired = plug.ControllerRequired 2631 vol.ControllersHealthy = plug.ControllersHealthy 2632 vol.NodesHealthy = plug.NodesHealthy 2633 2634 // This value may be stale, but stale is ok 2635 vol.ControllersExpected = plug.ControllersExpected 2636 vol.NodesExpected = plug.NodesExpected 2637 2638 vol.Schedulable = vol.NodesHealthy > 0 2639 if vol.ControllerRequired { 2640 vol.Schedulable = vol.ControllersHealthy > 0 && vol.Schedulable 2641 } 2642 2643 return vol, nil 2644 } 2645 2646 // CSIVolumeDenormalize returns a CSIVolume with its current 2647 // Allocations and Claims, including creating new PastClaims for 2648 // terminal or garbage collected allocations. This ensures we have a 2649 // consistent state. Note that it mutates the original volume and so 2650 // should always be called on a Copy after reading from the state 2651 // store. 2652 func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2653 txn := s.db.ReadTxn() 2654 return s.csiVolumeDenormalizeTxn(txn, ws, vol) 2655 } 2656 2657 // csiVolumeDenormalizeTxn implements CSIVolumeDenormalize inside a transaction 2658 func (s *StateStore) csiVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2659 if vol == nil { 2660 return nil, nil 2661 } 2662 2663 // note: denormalize mutates the maps we pass in! 2664 denormalize := func( 2665 currentAllocs map[string]*structs.Allocation, 2666 currentClaims, pastClaims map[string]*structs.CSIVolumeClaim, 2667 fallbackMode structs.CSIVolumeClaimMode) error { 2668 2669 for id := range currentAllocs { 2670 a, err := s.allocByIDImpl(txn, ws, id) 2671 if err != nil { 2672 return err 2673 } 2674 pastClaim := pastClaims[id] 2675 currentClaim := currentClaims[id] 2676 if currentClaim == nil { 2677 // COMPAT(1.4.0): the CSIVolumeClaim fields were added 2678 // after 0.11.1, so claims made before that may be 2679 // missing this value. No clusters should see this 2680 // anymore, so warn nosily in the logs so that 2681 // operators ask us about it. Remove this block and 2682 // the now-unused fallbackMode parameter, and return 2683 // an error if currentClaim is nil in 1.4.0 2684 s.logger.Warn("volume was missing claim for allocation", 2685 "volume_id", vol.ID, "alloc", id) 2686 currentClaim = &structs.CSIVolumeClaim{ 2687 AllocationID: a.ID, 2688 NodeID: a.NodeID, 2689 Mode: fallbackMode, 2690 State: structs.CSIVolumeClaimStateTaken, 2691 } 2692 currentClaims[id] = currentClaim 2693 } 2694 2695 currentAllocs[id] = a 2696 if (a == nil || a.TerminalStatus()) && pastClaim == nil { 2697 // the alloc is garbage collected but nothing has written a PastClaim, 2698 // so create one now 2699 pastClaim = &structs.CSIVolumeClaim{ 2700 AllocationID: id, 2701 NodeID: currentClaim.NodeID, 2702 Mode: currentClaim.Mode, 2703 State: structs.CSIVolumeClaimStateUnpublishing, 2704 AccessMode: currentClaim.AccessMode, 2705 AttachmentMode: currentClaim.AttachmentMode, 2706 } 2707 pastClaims[id] = pastClaim 2708 } 2709 2710 } 2711 return nil 2712 } 2713 2714 err := denormalize(vol.ReadAllocs, vol.ReadClaims, vol.PastClaims, 2715 structs.CSIVolumeClaimRead) 2716 if err != nil { 2717 return nil, err 2718 } 2719 err = denormalize(vol.WriteAllocs, vol.WriteClaims, vol.PastClaims, 2720 structs.CSIVolumeClaimWrite) 2721 if err != nil { 2722 return nil, err 2723 } 2724 2725 // COMPAT: the AccessMode and AttachmentMode fields were added to claims 2726 // in 1.1.0, so claims made before that may be missing this value. In this 2727 // case, the volume will already have AccessMode/AttachmentMode until it 2728 // no longer has any claims, so set from those values 2729 for _, claim := range vol.ReadClaims { 2730 if claim.AccessMode == "" || claim.AttachmentMode == "" { 2731 claim.AccessMode = vol.AccessMode 2732 claim.AttachmentMode = vol.AttachmentMode 2733 } 2734 } 2735 for _, claim := range vol.WriteClaims { 2736 if claim.AccessMode == "" || claim.AttachmentMode == "" { 2737 claim.AccessMode = vol.AccessMode 2738 claim.AttachmentMode = vol.AttachmentMode 2739 } 2740 } 2741 2742 return vol, nil 2743 } 2744 2745 // CSIPlugins returns the unfiltered list of all plugin health status 2746 func (s *StateStore) CSIPlugins(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2747 txn := s.db.ReadTxn() 2748 defer txn.Abort() 2749 2750 iter, err := txn.Get("csi_plugins", "id") 2751 if err != nil { 2752 return nil, fmt.Errorf("csi_plugins lookup failed: %v", err) 2753 } 2754 2755 ws.Add(iter.WatchCh()) 2756 2757 return iter, nil 2758 } 2759 2760 // CSIPluginsByIDPrefix supports search 2761 func (s *StateStore) CSIPluginsByIDPrefix(ws memdb.WatchSet, pluginID string) (memdb.ResultIterator, error) { 2762 txn := s.db.ReadTxn() 2763 2764 iter, err := txn.Get("csi_plugins", "id_prefix", pluginID) 2765 if err != nil { 2766 return nil, err 2767 } 2768 2769 ws.Add(iter.WatchCh()) 2770 2771 return iter, nil 2772 } 2773 2774 // CSIPluginByID returns a named CSIPlugin. This method creates a new 2775 // transaction so you should not call it from within another transaction. 2776 func (s *StateStore) CSIPluginByID(ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2777 txn := s.db.ReadTxn() 2778 plugin, err := s.CSIPluginByIDTxn(txn, ws, id) 2779 if err != nil { 2780 return nil, err 2781 } 2782 return plugin, nil 2783 } 2784 2785 // CSIPluginByIDTxn returns a named CSIPlugin 2786 func (s *StateStore) CSIPluginByIDTxn(txn Txn, ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2787 2788 watchCh, obj, err := txn.FirstWatch("csi_plugins", "id", id) 2789 if err != nil { 2790 return nil, fmt.Errorf("csi_plugin lookup failed: %s %v", id, err) 2791 } 2792 2793 ws.Add(watchCh) 2794 2795 if obj != nil { 2796 return obj.(*structs.CSIPlugin), nil 2797 } 2798 return nil, nil 2799 } 2800 2801 // CSIPluginDenormalize returns a CSIPlugin with allocation details. Always called on a copy of the plugin. 2802 func (s *StateStore) CSIPluginDenormalize(ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2803 txn := s.db.ReadTxn() 2804 return s.CSIPluginDenormalizeTxn(txn, ws, plug) 2805 } 2806 2807 func (s *StateStore) CSIPluginDenormalizeTxn(txn Txn, ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2808 if plug == nil { 2809 return nil, nil 2810 } 2811 2812 // Get the unique list of allocation ids 2813 ids := map[string]struct{}{} 2814 for _, info := range plug.Controllers { 2815 ids[info.AllocID] = struct{}{} 2816 } 2817 for _, info := range plug.Nodes { 2818 ids[info.AllocID] = struct{}{} 2819 } 2820 2821 for id := range ids { 2822 alloc, err := s.allocByIDImpl(txn, ws, id) 2823 if err != nil { 2824 return nil, err 2825 } 2826 if alloc == nil { 2827 continue 2828 } 2829 plug.Allocations = append(plug.Allocations, alloc.Stub(nil)) 2830 } 2831 sort.Slice(plug.Allocations, func(i, j int) bool { 2832 return plug.Allocations[i].ModifyIndex > plug.Allocations[j].ModifyIndex 2833 }) 2834 2835 return plug, nil 2836 } 2837 2838 // UpsertCSIPlugin writes the plugin to the state store. Note: there 2839 // is currently no raft message for this, as it's intended to support 2840 // testing use cases. 2841 func (s *StateStore) UpsertCSIPlugin(index uint64, plug *structs.CSIPlugin) error { 2842 txn := s.db.WriteTxn(index) 2843 defer txn.Abort() 2844 2845 existing, err := txn.First("csi_plugins", "id", plug.ID) 2846 if err != nil { 2847 return fmt.Errorf("csi_plugin lookup error: %s %v", plug.ID, err) 2848 } 2849 2850 plug.ModifyIndex = index 2851 if existing != nil { 2852 plug.CreateIndex = existing.(*structs.CSIPlugin).CreateIndex 2853 } 2854 2855 err = txn.Insert("csi_plugins", plug) 2856 if err != nil { 2857 return fmt.Errorf("csi_plugins insert error: %v", err) 2858 } 2859 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 2860 return fmt.Errorf("index update failed: %v", err) 2861 } 2862 return txn.Commit() 2863 } 2864 2865 // DeleteCSIPlugin deletes the plugin if it's not in use. 2866 func (s *StateStore) DeleteCSIPlugin(index uint64, id string) error { 2867 txn := s.db.WriteTxn(index) 2868 defer txn.Abort() 2869 2870 plug, err := s.CSIPluginByIDTxn(txn, nil, id) 2871 if err != nil { 2872 return err 2873 } 2874 2875 if plug == nil { 2876 return nil 2877 } 2878 2879 plug, err = s.CSIPluginDenormalizeTxn(txn, nil, plug.Copy()) 2880 if err != nil { 2881 return err 2882 } 2883 if !plug.IsEmpty() { 2884 return fmt.Errorf("plugin in use") 2885 } 2886 2887 err = txn.Delete("csi_plugins", plug) 2888 if err != nil { 2889 return fmt.Errorf("csi_plugins delete error: %v", err) 2890 } 2891 return txn.Commit() 2892 } 2893 2894 // UpsertPeriodicLaunch is used to register a launch or update it. 2895 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 2896 txn := s.db.WriteTxn(index) 2897 defer txn.Abort() 2898 2899 // Check if the job already exists 2900 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 2901 if err != nil { 2902 return fmt.Errorf("periodic launch lookup failed: %v", err) 2903 } 2904 2905 // Setup the indexes correctly 2906 if existing != nil { 2907 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 2908 launch.ModifyIndex = index 2909 } else { 2910 launch.CreateIndex = index 2911 launch.ModifyIndex = index 2912 } 2913 2914 // Insert the job 2915 if err := txn.Insert("periodic_launch", launch); err != nil { 2916 return fmt.Errorf("launch insert failed: %v", err) 2917 } 2918 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2919 return fmt.Errorf("index update failed: %v", err) 2920 } 2921 2922 return txn.Commit() 2923 } 2924 2925 // DeletePeriodicLaunch is used to delete the periodic launch 2926 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 2927 txn := s.db.WriteTxn(index) 2928 defer txn.Abort() 2929 2930 err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn) 2931 if err == nil { 2932 return txn.Commit() 2933 } 2934 return err 2935 } 2936 2937 // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch 2938 // but in a transaction. Useful for when making multiple modifications atomically 2939 func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error { 2940 // Lookup the launch 2941 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 2942 if err != nil { 2943 return fmt.Errorf("launch lookup failed: %v", err) 2944 } 2945 if existing == nil { 2946 return fmt.Errorf("launch not found") 2947 } 2948 2949 // Delete the launch 2950 if err := txn.Delete("periodic_launch", existing); err != nil { 2951 return fmt.Errorf("launch delete failed: %v", err) 2952 } 2953 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2954 return fmt.Errorf("index update failed: %v", err) 2955 } 2956 2957 return nil 2958 } 2959 2960 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 2961 // ID. 2962 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 2963 txn := s.db.ReadTxn() 2964 2965 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 2966 if err != nil { 2967 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 2968 } 2969 2970 ws.Add(watchCh) 2971 2972 if existing != nil { 2973 return existing.(*structs.PeriodicLaunch), nil 2974 } 2975 return nil, nil 2976 } 2977 2978 // PeriodicLaunches returns an iterator over all the periodic launches 2979 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2980 txn := s.db.ReadTxn() 2981 2982 // Walk the entire table 2983 iter, err := txn.Get("periodic_launch", "id") 2984 if err != nil { 2985 return nil, err 2986 } 2987 2988 ws.Add(iter.WatchCh()) 2989 2990 return iter, nil 2991 } 2992 2993 // UpsertEvals is used to upsert a set of evaluations 2994 func (s *StateStore) UpsertEvals(msgType structs.MessageType, index uint64, evals []*structs.Evaluation) error { 2995 txn := s.db.WriteTxnMsgT(msgType, index) 2996 defer txn.Abort() 2997 2998 err := s.UpsertEvalsTxn(index, evals, txn) 2999 if err == nil { 3000 return txn.Commit() 3001 } 3002 return err 3003 } 3004 3005 // UpsertEvalsTxn is used to upsert a set of evaluations, like UpsertEvals but 3006 // in a transaction. Useful for when making multiple modifications atomically. 3007 func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error { 3008 // Do a nested upsert 3009 jobs := make(map[structs.NamespacedID]string, len(evals)) 3010 for _, eval := range evals { 3011 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 3012 return err 3013 } 3014 3015 tuple := structs.NamespacedID{ 3016 ID: eval.JobID, 3017 Namespace: eval.Namespace, 3018 } 3019 jobs[tuple] = "" 3020 } 3021 3022 // Set the job's status 3023 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3024 return fmt.Errorf("setting job status failed: %v", err) 3025 } 3026 3027 return nil 3028 } 3029 3030 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 3031 func (s *StateStore) nestedUpsertEval(txn *txn, index uint64, eval *structs.Evaluation) error { 3032 // Lookup the evaluation 3033 existing, err := txn.First("evals", "id", eval.ID) 3034 if err != nil { 3035 return fmt.Errorf("eval lookup failed: %v", err) 3036 } 3037 3038 // Update the indexes 3039 if existing != nil { 3040 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 3041 eval.ModifyIndex = index 3042 } else { 3043 eval.CreateIndex = index 3044 eval.ModifyIndex = index 3045 } 3046 3047 // Update the job summary 3048 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 3049 if err != nil { 3050 return fmt.Errorf("job summary lookup failed: %v", err) 3051 } 3052 if summaryRaw != nil { 3053 js := summaryRaw.(*structs.JobSummary).Copy() 3054 hasSummaryChanged := false 3055 for tg, num := range eval.QueuedAllocations { 3056 if summary, ok := js.Summary[tg]; ok { 3057 if summary.Queued != num { 3058 summary.Queued = num 3059 js.Summary[tg] = summary 3060 hasSummaryChanged = true 3061 } 3062 } else { 3063 s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace) 3064 } 3065 } 3066 3067 // Insert the job summary 3068 if hasSummaryChanged { 3069 js.ModifyIndex = index 3070 if err := txn.Insert("job_summary", js); err != nil { 3071 return fmt.Errorf("job summary insert failed: %v", err) 3072 } 3073 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 3074 return fmt.Errorf("index update failed: %v", err) 3075 } 3076 } 3077 } 3078 3079 // Check if the job has any blocked evaluations and cancel them 3080 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 3081 // Get the blocked evaluation for a job if it exists 3082 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 3083 if err != nil { 3084 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 3085 } 3086 3087 var blocked []*structs.Evaluation 3088 for { 3089 raw := iter.Next() 3090 if raw == nil { 3091 break 3092 } 3093 blocked = append(blocked, raw.(*structs.Evaluation)) 3094 } 3095 3096 // Go through and update the evals 3097 for _, eval := range blocked { 3098 newEval := eval.Copy() 3099 newEval.Status = structs.EvalStatusCancelled 3100 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 3101 newEval.ModifyIndex = index 3102 3103 if err := txn.Insert("evals", newEval); err != nil { 3104 return fmt.Errorf("eval insert failed: %v", err) 3105 } 3106 } 3107 } 3108 3109 // Insert the eval 3110 if err := txn.Insert("evals", eval); err != nil { 3111 return fmt.Errorf("eval insert failed: %v", err) 3112 } 3113 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3114 return fmt.Errorf("index update failed: %v", err) 3115 } 3116 return nil 3117 } 3118 3119 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 3120 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 3121 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 3122 func (s *StateStore) updateEvalModifyIndex(txn *txn, index uint64, evalID string) error { 3123 // Lookup the evaluation 3124 existing, err := txn.First("evals", "id", evalID) 3125 if err != nil { 3126 return fmt.Errorf("eval lookup failed: %v", err) 3127 } 3128 if existing == nil { 3129 s.logger.Error("unable to find eval", "eval_id", evalID) 3130 return fmt.Errorf("unable to find eval id %q", evalID) 3131 } 3132 eval := existing.(*structs.Evaluation).Copy() 3133 // Update the indexes 3134 eval.ModifyIndex = index 3135 3136 // Insert the eval 3137 if err := txn.Insert("evals", eval); err != nil { 3138 return fmt.Errorf("eval insert failed: %v", err) 3139 } 3140 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3141 return fmt.Errorf("index update failed: %v", err) 3142 } 3143 return nil 3144 } 3145 3146 // DeleteEvalsByFilter is used to delete all evals that are both safe to delete 3147 // and match a filter. 3148 func (s *StateStore) DeleteEvalsByFilter(index uint64, filterExpr string, pageToken string, perPage int32) error { 3149 txn := s.db.WriteTxn(index) 3150 defer txn.Abort() 3151 3152 // These are always user-initiated, so ensure the eval broker is paused. 3153 _, schedConfig, err := s.schedulerConfigTxn(txn) 3154 if err != nil { 3155 return err 3156 } 3157 if schedConfig == nil || !schedConfig.PauseEvalBroker { 3158 return errors.New("eval broker is enabled; eval broker must be paused to delete evals") 3159 } 3160 3161 filter, err := bexpr.CreateEvaluator(filterExpr) 3162 if err != nil { 3163 return err 3164 } 3165 3166 iter, err := s.Evals(nil, SortDefault) 3167 if err != nil { 3168 return fmt.Errorf("failed to lookup evals: %v", err) 3169 } 3170 3171 // Note: Paginator imports this package for testing so we can't just use 3172 // Paginator 3173 pageCount := int32(0) 3174 3175 for { 3176 if pageCount >= perPage { 3177 break 3178 } 3179 raw := iter.Next() 3180 if raw == nil { 3181 break 3182 } 3183 eval := raw.(*structs.Evaluation) 3184 if eval.ID < pageToken { 3185 continue 3186 } 3187 3188 deleteOk, err := s.EvalIsUserDeleteSafe(nil, eval) 3189 if !deleteOk || err != nil { 3190 continue 3191 } 3192 match, err := filter.Evaluate(eval) 3193 if !match || err != nil { 3194 continue 3195 } 3196 if err := txn.Delete("evals", eval); err != nil { 3197 return fmt.Errorf("eval delete failed: %v", err) 3198 } 3199 pageCount++ 3200 } 3201 3202 err = txn.Commit() 3203 return err 3204 } 3205 3206 // EvalIsUserDeleteSafe ensures an evaluation is safe to delete based on its 3207 // related allocation and job information. This follows similar, but different 3208 // rules to the eval reap checking, to ensure evaluations for running allocs or 3209 // allocs which need the evaluation detail are not deleted. 3210 // 3211 // Returns both a bool and an error so that error in querying the related 3212 // objects can be differentiated from reporting that the eval isn't safe to 3213 // delete. 3214 func (s *StateStore) EvalIsUserDeleteSafe(ws memdb.WatchSet, eval *structs.Evaluation) (bool, error) { 3215 3216 job, err := s.JobByID(ws, eval.Namespace, eval.JobID) 3217 if err != nil { 3218 return false, fmt.Errorf("failed to lookup job for eval: %v", err) 3219 } 3220 3221 allocs, err := s.AllocsByEval(ws, eval.ID) 3222 if err != nil { 3223 return false, fmt.Errorf("failed to lookup eval allocs: %v", err) 3224 } 3225 3226 return isEvalDeleteSafe(allocs, job), nil 3227 } 3228 3229 func isEvalDeleteSafe(allocs []*structs.Allocation, job *structs.Job) bool { 3230 3231 // If the job is deleted, stopped, or dead, all allocs are terminal and 3232 // the eval can be deleted. 3233 if job == nil || job.Stop || job.Status == structs.JobStatusDead { 3234 return true 3235 } 3236 3237 // Iterate the allocations associated to the eval, if any, and check 3238 // whether we can delete the eval. 3239 for _, alloc := range allocs { 3240 3241 // If the allocation is still classed as running on the client, or 3242 // might be, we can't delete. 3243 switch alloc.ClientStatus { 3244 case structs.AllocClientStatusRunning, structs.AllocClientStatusUnknown: 3245 return false 3246 } 3247 3248 // If the alloc hasn't failed then we don't need to consider it for 3249 // rescheduling. Rescheduling needs to copy over information from the 3250 // previous alloc so that it can enforce the reschedule policy. 3251 if alloc.ClientStatus != structs.AllocClientStatusFailed { 3252 continue 3253 } 3254 3255 var reschedulePolicy *structs.ReschedulePolicy 3256 tg := job.LookupTaskGroup(alloc.TaskGroup) 3257 3258 if tg != nil { 3259 reschedulePolicy = tg.ReschedulePolicy 3260 } 3261 3262 // No reschedule policy or rescheduling is disabled 3263 if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) { 3264 continue 3265 } 3266 3267 // The restart tracking information has not been carried forward. 3268 if alloc.NextAllocation == "" { 3269 return false 3270 } 3271 3272 // This task has unlimited rescheduling and the alloc has not been 3273 // replaced, so we can't delete the eval yet. 3274 if reschedulePolicy.Unlimited { 3275 return false 3276 } 3277 3278 // No restarts have been attempted yet. 3279 if alloc.RescheduleTracker == nil || len(alloc.RescheduleTracker.Events) == 0 { 3280 return false 3281 } 3282 } 3283 3284 return true 3285 } 3286 3287 // DeleteEval is used to delete an evaluation 3288 func (s *StateStore) DeleteEval(index uint64, evals, allocs []string, userInitiated bool) error { 3289 txn := s.db.WriteTxn(index) 3290 defer txn.Abort() 3291 3292 // If this deletion has been initiated by an operator, ensure the eval 3293 // broker is paused. 3294 if userInitiated { 3295 _, schedConfig, err := s.schedulerConfigTxn(txn) 3296 if err != nil { 3297 return err 3298 } 3299 if schedConfig == nil || !schedConfig.PauseEvalBroker { 3300 return errors.New("eval broker is enabled; eval broker must be paused to delete evals") 3301 } 3302 } 3303 3304 jobs := make(map[structs.NamespacedID]string, len(evals)) 3305 3306 // evalsTableUpdated and allocsTableUpdated allow us to track whether each 3307 // table has been modified. This allows us to skip updating the index table 3308 // entries if we do not need to. 3309 var evalsTableUpdated, allocsTableUpdated bool 3310 3311 for _, eval := range evals { 3312 existing, err := txn.First("evals", "id", eval) 3313 if err != nil { 3314 return fmt.Errorf("eval lookup failed: %v", err) 3315 } 3316 if existing == nil { 3317 continue 3318 } 3319 if err := txn.Delete("evals", existing); err != nil { 3320 return fmt.Errorf("eval delete failed: %v", err) 3321 } 3322 3323 // Mark that we have made a successful modification to the evals 3324 // table. 3325 evalsTableUpdated = true 3326 3327 eval := existing.(*structs.Evaluation) 3328 3329 tuple := structs.NamespacedID{ 3330 ID: eval.JobID, 3331 Namespace: eval.Namespace, 3332 } 3333 jobs[tuple] = "" 3334 } 3335 3336 for _, alloc := range allocs { 3337 raw, err := txn.First("allocs", "id", alloc) 3338 if err != nil { 3339 return fmt.Errorf("alloc lookup failed: %v", err) 3340 } 3341 if raw == nil { 3342 continue 3343 } 3344 if err := txn.Delete("allocs", raw); err != nil { 3345 return fmt.Errorf("alloc delete failed: %v", err) 3346 } 3347 3348 // Mark that we have made a successful modification to the allocs 3349 // table. 3350 allocsTableUpdated = true 3351 } 3352 3353 // Update the indexes 3354 if evalsTableUpdated { 3355 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3356 return fmt.Errorf("index update failed: %v", err) 3357 } 3358 } 3359 if allocsTableUpdated { 3360 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3361 return fmt.Errorf("index update failed: %v", err) 3362 } 3363 } 3364 3365 // Set the job's status 3366 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 3367 return fmt.Errorf("setting job status failed: %v", err) 3368 } 3369 3370 return txn.Commit() 3371 } 3372 3373 // EvalByID is used to lookup an eval by its ID 3374 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 3375 txn := s.db.ReadTxn() 3376 3377 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 3378 if err != nil { 3379 return nil, fmt.Errorf("eval lookup failed: %v", err) 3380 } 3381 3382 ws.Add(watchCh) 3383 3384 if existing != nil { 3385 return existing.(*structs.Evaluation), nil 3386 } 3387 return nil, nil 3388 } 3389 3390 // EvalsRelatedToID is used to retrieve the evals that are related (next, 3391 // previous, or blocked) to the provided eval ID. 3392 func (s *StateStore) EvalsRelatedToID(ws memdb.WatchSet, id string) ([]*structs.EvaluationStub, error) { 3393 txn := s.db.ReadTxn() 3394 3395 raw, err := txn.First("evals", "id", id) 3396 if err != nil { 3397 return nil, fmt.Errorf("eval lookup failed: %v", err) 3398 } 3399 if raw == nil { 3400 return nil, nil 3401 } 3402 eval := raw.(*structs.Evaluation) 3403 3404 relatedEvals := []*structs.EvaluationStub{} 3405 todo := eval.RelatedIDs() 3406 done := map[string]bool{ 3407 eval.ID: true, // don't place the requested eval in the related list. 3408 } 3409 3410 for len(todo) > 0 { 3411 // Pop the first value from the todo list. 3412 current := todo[0] 3413 todo = todo[1:] 3414 if current == "" { 3415 continue 3416 } 3417 3418 // Skip value if we already have it in the results. 3419 if done[current] { 3420 continue 3421 } 3422 3423 eval, err := s.EvalByID(ws, current) 3424 if err != nil { 3425 return nil, err 3426 } 3427 if eval == nil { 3428 continue 3429 } 3430 3431 todo = append(todo, eval.RelatedIDs()...) 3432 relatedEvals = append(relatedEvals, eval.Stub()) 3433 done[eval.ID] = true 3434 } 3435 3436 return relatedEvals, nil 3437 } 3438 3439 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 3440 // namespace 3441 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) { 3442 txn := s.db.ReadTxn() 3443 3444 var iter memdb.ResultIterator 3445 var err error 3446 3447 // Get an iterator over all evals by the id prefix 3448 switch sort { 3449 case SortReverse: 3450 iter, err = txn.GetReverse("evals", "id_prefix", id) 3451 default: 3452 iter, err = txn.Get("evals", "id_prefix", id) 3453 } 3454 if err != nil { 3455 return nil, fmt.Errorf("eval lookup failed: %v", err) 3456 } 3457 3458 ws.Add(iter.WatchCh()) 3459 3460 // Wrap the iterator in a filter 3461 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 3462 return wrap, nil 3463 } 3464 3465 // evalNamespaceFilter returns a filter function that filters all evaluations 3466 // not in the given namespace. 3467 func evalNamespaceFilter(namespace string) func(interface{}) bool { 3468 return func(raw interface{}) bool { 3469 eval, ok := raw.(*structs.Evaluation) 3470 if !ok { 3471 return true 3472 } 3473 3474 return namespace != structs.AllNamespacesSentinel && 3475 eval.Namespace != namespace 3476 } 3477 } 3478 3479 // EvalsByJob returns all the evaluations by job id 3480 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 3481 txn := s.db.ReadTxn() 3482 3483 // Get an iterator over the node allocations 3484 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 3485 if err != nil { 3486 return nil, err 3487 } 3488 3489 ws.Add(iter.WatchCh()) 3490 3491 var out []*structs.Evaluation 3492 for { 3493 raw := iter.Next() 3494 if raw == nil { 3495 break 3496 } 3497 3498 e := raw.(*structs.Evaluation) 3499 3500 // Filter non-exact matches 3501 if e.JobID != jobID { 3502 continue 3503 } 3504 3505 out = append(out, e) 3506 } 3507 return out, nil 3508 } 3509 3510 // Evals returns an iterator over all the evaluations in ascending or descending 3511 // order of CreationIndex as determined by the reverse parameter. 3512 func (s *StateStore) Evals(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 3513 txn := s.db.ReadTxn() 3514 3515 var it memdb.ResultIterator 3516 var err error 3517 3518 switch sort { 3519 case SortReverse: 3520 it, err = txn.GetReverse("evals", "create") 3521 default: 3522 it, err = txn.Get("evals", "create") 3523 } 3524 3525 if err != nil { 3526 return nil, err 3527 } 3528 3529 ws.Add(it.WatchCh()) 3530 3531 return it, nil 3532 } 3533 3534 // EvalsByNamespace returns an iterator over all evaluations in no particular 3535 // order. 3536 // 3537 // todo(shoenig): can this be removed? 3538 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 3539 txn := s.db.ReadTxn() 3540 3541 it, err := txn.Get("evals", "namespace", namespace) 3542 if err != nil { 3543 return nil, err 3544 } 3545 3546 ws.Add(it.WatchCh()) 3547 3548 return it, nil 3549 } 3550 3551 func (s *StateStore) EvalsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 3552 txn := s.db.ReadTxn() 3553 3554 var ( 3555 it memdb.ResultIterator 3556 err error 3557 exact = terminate(namespace) 3558 ) 3559 3560 switch sort { 3561 case SortReverse: 3562 it, err = txn.GetReverse("evals", "namespace_create_prefix", exact) 3563 default: 3564 it, err = txn.Get("evals", "namespace_create_prefix", exact) 3565 } 3566 3567 if err != nil { 3568 return nil, err 3569 } 3570 3571 ws.Add(it.WatchCh()) 3572 3573 return it, nil 3574 } 3575 3576 // UpdateAllocsFromClient is used to update an allocation based on input 3577 // from a client. While the schedulers are the authority on the allocation for 3578 // most things, some updates are authoritative from the client. Specifically, 3579 // the desired state comes from the schedulers, while the actual state comes 3580 // from clients. 3581 func (s *StateStore) UpdateAllocsFromClient(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3582 txn := s.db.WriteTxnMsgT(msgType, index) 3583 defer txn.Abort() 3584 3585 // Handle each of the updated allocations 3586 for _, alloc := range allocs { 3587 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 3588 return err 3589 } 3590 } 3591 3592 // Update the indexes 3593 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3594 return fmt.Errorf("index update failed: %v", err) 3595 } 3596 3597 return txn.Commit() 3598 } 3599 3600 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 3601 func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *structs.Allocation) error { 3602 // Look for existing alloc 3603 existing, err := txn.First("allocs", "id", alloc.ID) 3604 if err != nil { 3605 return fmt.Errorf("alloc lookup failed: %v", err) 3606 } 3607 3608 // Nothing to do if this does not exist 3609 if existing == nil { 3610 return nil 3611 } 3612 exist := existing.(*structs.Allocation) 3613 3614 // Copy everything from the existing allocation 3615 copyAlloc := exist.Copy() 3616 3617 // Pull in anything the client is the authority on 3618 copyAlloc.ClientStatus = alloc.ClientStatus 3619 copyAlloc.ClientDescription = alloc.ClientDescription 3620 copyAlloc.TaskStates = alloc.TaskStates 3621 copyAlloc.NetworkStatus = alloc.NetworkStatus 3622 3623 // The client can only set its deployment health and timestamp, so just take 3624 // those 3625 if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil { 3626 oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth() 3627 newHasHealthy := alloc.DeploymentStatus.HasHealth() 3628 3629 // We got new health information from the client 3630 if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) { 3631 // Updated deployment health and timestamp 3632 copyAlloc.DeploymentStatus.Healthy = pointer.Of(*alloc.DeploymentStatus.Healthy) 3633 copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp 3634 copyAlloc.DeploymentStatus.ModifyIndex = index 3635 } 3636 } else if alloc.DeploymentStatus != nil { 3637 // First time getting a deployment status so copy everything and just 3638 // set the index 3639 copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy() 3640 copyAlloc.DeploymentStatus.ModifyIndex = index 3641 } 3642 3643 // Update the modify index 3644 copyAlloc.ModifyIndex = index 3645 3646 // Update the modify time 3647 copyAlloc.ModifyTime = alloc.ModifyTime 3648 3649 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 3650 return fmt.Errorf("error updating deployment: %v", err) 3651 } 3652 3653 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 3654 return fmt.Errorf("error updating job summary: %v", err) 3655 } 3656 3657 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 3658 return err 3659 } 3660 3661 if err := s.updatePluginForTerminalAlloc(index, copyAlloc, txn); err != nil { 3662 return err 3663 } 3664 3665 // Update the allocation 3666 if err := txn.Insert("allocs", copyAlloc); err != nil { 3667 return fmt.Errorf("alloc insert failed: %v", err) 3668 } 3669 3670 // Set the job's status 3671 forceStatus := "" 3672 if !copyAlloc.TerminalStatus() { 3673 forceStatus = structs.JobStatusRunning 3674 } 3675 3676 tuple := structs.NamespacedID{ 3677 ID: exist.JobID, 3678 Namespace: exist.Namespace, 3679 } 3680 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 3681 3682 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3683 return fmt.Errorf("setting job status failed: %v", err) 3684 } 3685 return nil 3686 } 3687 3688 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 3689 // the same time. 3690 func (s *StateStore) UpsertAllocs(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3691 txn := s.db.WriteTxn(index) 3692 defer txn.Abort() 3693 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 3694 return err 3695 } 3696 return txn.Commit() 3697 } 3698 3699 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 3700 // used with an existing transaction. 3701 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *txn) error { 3702 // Handle the allocations 3703 jobs := make(map[structs.NamespacedID]string, 1) 3704 for _, alloc := range allocs { 3705 existing, err := txn.First("allocs", "id", alloc.ID) 3706 if err != nil { 3707 return fmt.Errorf("alloc lookup failed: %v", err) 3708 } 3709 exist, _ := existing.(*structs.Allocation) 3710 3711 if exist == nil { 3712 alloc.CreateIndex = index 3713 alloc.ModifyIndex = index 3714 alloc.AllocModifyIndex = index 3715 if alloc.DeploymentStatus != nil { 3716 alloc.DeploymentStatus.ModifyIndex = index 3717 } 3718 3719 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 3720 // the a race between a forced garbage collection and the scheduler 3721 // marking an allocation as terminal. The issue is that the 3722 // allocation from the scheduler has its job normalized and the FSM 3723 // will only denormalize if the allocation is not terminal. However 3724 // if the allocation is garbage collected, that will result in a 3725 // allocation being upserted for the first time without a job 3726 // attached. By returning an error here, it will cause the FSM to 3727 // error, causing the plan_apply to error and thus causing the 3728 // evaluation to be failed. This will force an index refresh that 3729 // should solve this issue. 3730 if alloc.Job == nil { 3731 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 3732 } 3733 } else { 3734 alloc.CreateIndex = exist.CreateIndex 3735 alloc.ModifyIndex = index 3736 alloc.AllocModifyIndex = index 3737 3738 // Keep the clients task states 3739 alloc.TaskStates = exist.TaskStates 3740 3741 // If the scheduler is marking this allocation as lost or unknown we do not 3742 // want to reuse the status of the existing allocation. 3743 if alloc.ClientStatus != structs.AllocClientStatusLost && 3744 alloc.ClientStatus != structs.AllocClientStatusUnknown { 3745 alloc.ClientStatus = exist.ClientStatus 3746 alloc.ClientDescription = exist.ClientDescription 3747 } 3748 3749 // The job has been denormalized so re-attach the original job 3750 if alloc.Job == nil { 3751 alloc.Job = exist.Job 3752 } 3753 } 3754 3755 // OPTIMIZATION: 3756 // These should be given a map of new to old allocation and the updates 3757 // should be one on all changes. The current implementation causes O(n) 3758 // lookups/copies/insertions rather than O(1) 3759 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 3760 return fmt.Errorf("error updating deployment: %v", err) 3761 } 3762 3763 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 3764 return fmt.Errorf("error updating job summary: %v", err) 3765 } 3766 3767 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 3768 return err 3769 } 3770 3771 if err := s.updatePluginForTerminalAlloc(index, alloc, txn); err != nil { 3772 return err 3773 } 3774 3775 if err := txn.Insert("allocs", alloc); err != nil { 3776 return fmt.Errorf("alloc insert failed: %v", err) 3777 } 3778 3779 if alloc.PreviousAllocation != "" { 3780 prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation) 3781 if err != nil { 3782 return fmt.Errorf("alloc lookup failed: %v", err) 3783 } 3784 existingPrevAlloc, _ := prevAlloc.(*structs.Allocation) 3785 if existingPrevAlloc != nil { 3786 prevAllocCopy := existingPrevAlloc.Copy() 3787 prevAllocCopy.NextAllocation = alloc.ID 3788 prevAllocCopy.ModifyIndex = index 3789 if err := txn.Insert("allocs", prevAllocCopy); err != nil { 3790 return fmt.Errorf("alloc insert failed: %v", err) 3791 } 3792 } 3793 } 3794 3795 // If the allocation is running, force the job to running status. 3796 forceStatus := "" 3797 if !alloc.TerminalStatus() { 3798 forceStatus = structs.JobStatusRunning 3799 } 3800 3801 tuple := structs.NamespacedID{ 3802 ID: alloc.JobID, 3803 Namespace: alloc.Namespace, 3804 } 3805 jobs[tuple] = forceStatus 3806 } 3807 3808 // Update the indexes 3809 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3810 return fmt.Errorf("index update failed: %v", err) 3811 } 3812 3813 // Set the job's status 3814 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3815 return fmt.Errorf("setting job status failed: %v", err) 3816 } 3817 3818 return nil 3819 } 3820 3821 // UpdateAllocsDesiredTransitions is used to update a set of allocations 3822 // desired transitions. 3823 func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType, index uint64, allocs map[string]*structs.DesiredTransition, 3824 evals []*structs.Evaluation) error { 3825 3826 txn := s.db.WriteTxnMsgT(msgType, index) 3827 defer txn.Abort() 3828 3829 // Handle each of the updated allocations 3830 for id, transition := range allocs { 3831 if err := s.UpdateAllocDesiredTransitionTxn(txn, index, id, transition); err != nil { 3832 return err 3833 } 3834 } 3835 3836 for _, eval := range evals { 3837 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 3838 return err 3839 } 3840 } 3841 3842 // Update the indexes 3843 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3844 return fmt.Errorf("index update failed: %v", err) 3845 } 3846 3847 return txn.Commit() 3848 } 3849 3850 // UpdateAllocDesiredTransitionTxn is used to nest an update of an 3851 // allocations desired transition 3852 func (s *StateStore) UpdateAllocDesiredTransitionTxn( 3853 txn *txn, index uint64, allocID string, 3854 transition *structs.DesiredTransition) error { 3855 3856 // Look for existing alloc 3857 existing, err := txn.First("allocs", "id", allocID) 3858 if err != nil { 3859 return fmt.Errorf("alloc lookup failed: %v", err) 3860 } 3861 3862 // Nothing to do if this does not exist 3863 if existing == nil { 3864 return nil 3865 } 3866 exist := existing.(*structs.Allocation) 3867 3868 // Copy everything from the existing allocation 3869 copyAlloc := exist.Copy() 3870 3871 // Merge the desired transitions 3872 copyAlloc.DesiredTransition.Merge(transition) 3873 3874 // Update the modify indexes 3875 copyAlloc.ModifyIndex = index 3876 copyAlloc.AllocModifyIndex = index 3877 3878 // Update the allocation 3879 if err := txn.Insert("allocs", copyAlloc); err != nil { 3880 return fmt.Errorf("alloc insert failed: %v", err) 3881 } 3882 3883 return nil 3884 } 3885 3886 // AllocByID is used to lookup an allocation by its ID 3887 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 3888 txn := s.db.ReadTxn() 3889 return s.allocByIDImpl(txn, ws, id) 3890 } 3891 3892 // allocByIDImpl retrives an allocation and is called under and existing 3893 // transaction. An optional watch set can be passed to add allocations to the 3894 // watch set 3895 func (s *StateStore) allocByIDImpl(txn Txn, ws memdb.WatchSet, id string) (*structs.Allocation, error) { 3896 watchCh, raw, err := txn.FirstWatch("allocs", "id", id) 3897 if err != nil { 3898 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3899 } 3900 3901 ws.Add(watchCh) 3902 3903 if raw == nil { 3904 return nil, nil 3905 } 3906 alloc := raw.(*structs.Allocation) 3907 return alloc, nil 3908 } 3909 3910 // AllocsByIDPrefix is used to lookup allocs by prefix 3911 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) { 3912 txn := s.db.ReadTxn() 3913 3914 var iter memdb.ResultIterator 3915 var err error 3916 3917 switch sort { 3918 case SortReverse: 3919 iter, err = txn.GetReverse("allocs", "id_prefix", id) 3920 default: 3921 iter, err = txn.Get("allocs", "id_prefix", id) 3922 } 3923 if err != nil { 3924 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3925 } 3926 3927 ws.Add(iter.WatchCh()) 3928 3929 // Wrap the iterator in a filter 3930 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 3931 return wrap, nil 3932 } 3933 3934 // allocNamespaceFilter returns a filter function that filters all allocations 3935 // not in the given namespace. 3936 func allocNamespaceFilter(namespace string) func(interface{}) bool { 3937 return func(raw interface{}) bool { 3938 alloc, ok := raw.(*structs.Allocation) 3939 if !ok { 3940 return true 3941 } 3942 3943 if namespace == structs.AllNamespacesSentinel { 3944 return false 3945 } 3946 3947 return alloc.Namespace != namespace 3948 } 3949 } 3950 3951 // AllocsByIDPrefixAllNSs is used to lookup allocs by prefix. 3952 func (s *StateStore) AllocsByIDPrefixAllNSs(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3953 txn := s.db.ReadTxn() 3954 3955 iter, err := txn.Get("allocs", "id_prefix", prefix) 3956 if err != nil { 3957 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3958 } 3959 3960 ws.Add(iter.WatchCh()) 3961 3962 return iter, nil 3963 } 3964 3965 // AllocsByNode returns all the allocations by node 3966 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 3967 txn := s.db.ReadTxn() 3968 3969 return allocsByNodeTxn(txn, ws, node) 3970 } 3971 3972 func allocsByNodeTxn(txn ReadTxn, ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 3973 // Get an iterator over the node allocations, using only the 3974 // node prefix which ignores the terminal status 3975 iter, err := txn.Get("allocs", "node_prefix", node) 3976 if err != nil { 3977 return nil, err 3978 } 3979 3980 ws.Add(iter.WatchCh()) 3981 3982 var out []*structs.Allocation 3983 for { 3984 raw := iter.Next() 3985 if raw == nil { 3986 break 3987 } 3988 out = append(out, raw.(*structs.Allocation)) 3989 } 3990 return out, nil 3991 } 3992 3993 // AllocsByNodeTerminal returns all the allocations by node and terminal 3994 // status. 3995 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 3996 txn := s.db.ReadTxn() 3997 3998 // Get an iterator over the node allocations 3999 iter, err := txn.Get("allocs", "node", node, terminal) 4000 if err != nil { 4001 return nil, err 4002 } 4003 4004 ws.Add(iter.WatchCh()) 4005 4006 var out []*structs.Allocation 4007 for { 4008 raw := iter.Next() 4009 if raw == nil { 4010 break 4011 } 4012 out = append(out, raw.(*structs.Allocation)) 4013 } 4014 return out, nil 4015 } 4016 4017 // AllocsByJob returns allocations by job id 4018 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, anyCreateIndex bool) ([]*structs.Allocation, error) { 4019 txn := s.db.ReadTxn() 4020 4021 // Get the job 4022 var job *structs.Job 4023 rawJob, err := txn.First("jobs", "id", namespace, jobID) 4024 if err != nil { 4025 return nil, err 4026 } 4027 if rawJob != nil { 4028 job = rawJob.(*structs.Job) 4029 } 4030 4031 // Get an iterator over the node allocations 4032 iter, err := txn.Get("allocs", "job", namespace, jobID) 4033 if err != nil { 4034 return nil, err 4035 } 4036 4037 ws.Add(iter.WatchCh()) 4038 4039 var out []*structs.Allocation 4040 for { 4041 raw := iter.Next() 4042 if raw == nil { 4043 break 4044 } 4045 4046 alloc := raw.(*structs.Allocation) 4047 // If the allocation belongs to a job with the same ID but a different 4048 // create index and we are not getting all the allocations whose Jobs 4049 // matches the same Job ID then we skip it 4050 if !anyCreateIndex && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 4051 continue 4052 } 4053 out = append(out, raw.(*structs.Allocation)) 4054 } 4055 return out, nil 4056 } 4057 4058 // AllocsByEval returns all the allocations by eval id 4059 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 4060 txn := s.db.ReadTxn() 4061 4062 // Get an iterator over the eval allocations 4063 iter, err := txn.Get("allocs", "eval", evalID) 4064 if err != nil { 4065 return nil, err 4066 } 4067 4068 ws.Add(iter.WatchCh()) 4069 4070 var out []*structs.Allocation 4071 for { 4072 raw := iter.Next() 4073 if raw == nil { 4074 break 4075 } 4076 out = append(out, raw.(*structs.Allocation)) 4077 } 4078 return out, nil 4079 } 4080 4081 // AllocsByDeployment returns all the allocations by deployment id 4082 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 4083 txn := s.db.ReadTxn() 4084 4085 // Get an iterator over the deployments allocations 4086 iter, err := txn.Get("allocs", "deployment", deploymentID) 4087 if err != nil { 4088 return nil, err 4089 } 4090 4091 ws.Add(iter.WatchCh()) 4092 4093 var out []*structs.Allocation 4094 for { 4095 raw := iter.Next() 4096 if raw == nil { 4097 break 4098 } 4099 out = append(out, raw.(*structs.Allocation)) 4100 } 4101 return out, nil 4102 } 4103 4104 // Allocs returns an iterator over all the evaluations. 4105 func (s *StateStore) Allocs(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 4106 txn := s.db.ReadTxn() 4107 4108 var it memdb.ResultIterator 4109 var err error 4110 4111 switch sort { 4112 case SortReverse: 4113 it, err = txn.GetReverse("allocs", "create") 4114 default: 4115 it, err = txn.Get("allocs", "create") 4116 } 4117 4118 if err != nil { 4119 return nil, err 4120 } 4121 4122 ws.Add(it.WatchCh()) 4123 4124 return it, nil 4125 } 4126 4127 func (s *StateStore) AllocsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 4128 txn := s.db.ReadTxn() 4129 4130 var ( 4131 it memdb.ResultIterator 4132 err error 4133 exact = terminate(namespace) 4134 ) 4135 4136 switch sort { 4137 case SortReverse: 4138 it, err = txn.GetReverse("allocs", "namespace_create_prefix", exact) 4139 default: 4140 it, err = txn.Get("allocs", "namespace_create_prefix", exact) 4141 } 4142 4143 if err != nil { 4144 return nil, err 4145 } 4146 4147 ws.Add(it.WatchCh()) 4148 4149 return it, nil 4150 } 4151 4152 // AllocsByNamespace returns an iterator over all the allocations in the 4153 // namespace 4154 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 4155 txn := s.db.ReadTxn() 4156 return s.allocsByNamespaceImpl(ws, txn, namespace) 4157 } 4158 4159 // allocsByNamespaceImpl returns an iterator over all the allocations in the 4160 // namespace 4161 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *txn, namespace string) (memdb.ResultIterator, error) { 4162 // Walk the entire table 4163 iter, err := txn.Get("allocs", "namespace", namespace) 4164 if err != nil { 4165 return nil, err 4166 } 4167 4168 ws.Add(iter.WatchCh()) 4169 4170 return iter, nil 4171 } 4172 4173 // UpsertVaultAccessor is used to register a set of Vault Accessors. 4174 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 4175 txn := s.db.WriteTxn(index) 4176 defer txn.Abort() 4177 4178 for _, accessor := range accessors { 4179 // Set the create index 4180 accessor.CreateIndex = index 4181 4182 // Insert the accessor 4183 if err := txn.Insert("vault_accessors", accessor); err != nil { 4184 return fmt.Errorf("accessor insert failed: %v", err) 4185 } 4186 } 4187 4188 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 4189 return fmt.Errorf("index update failed: %v", err) 4190 } 4191 4192 return txn.Commit() 4193 } 4194 4195 // DeleteVaultAccessors is used to delete a set of Vault Accessors 4196 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 4197 txn := s.db.WriteTxn(index) 4198 defer txn.Abort() 4199 4200 // Lookup the accessor 4201 for _, accessor := range accessors { 4202 // Delete the accessor 4203 if err := txn.Delete("vault_accessors", accessor); err != nil { 4204 return fmt.Errorf("accessor delete failed: %v", err) 4205 } 4206 } 4207 4208 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 4209 return fmt.Errorf("index update failed: %v", err) 4210 } 4211 4212 return txn.Commit() 4213 } 4214 4215 // VaultAccessor returns the given Vault accessor 4216 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 4217 txn := s.db.ReadTxn() 4218 4219 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 4220 if err != nil { 4221 return nil, fmt.Errorf("accessor lookup failed: %v", err) 4222 } 4223 4224 ws.Add(watchCh) 4225 4226 if existing != nil { 4227 return existing.(*structs.VaultAccessor), nil 4228 } 4229 4230 return nil, nil 4231 } 4232 4233 // VaultAccessors returns an iterator of Vault accessors. 4234 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4235 txn := s.db.ReadTxn() 4236 4237 iter, err := txn.Get("vault_accessors", "id") 4238 if err != nil { 4239 return nil, err 4240 } 4241 4242 ws.Add(iter.WatchCh()) 4243 4244 return iter, nil 4245 } 4246 4247 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 4248 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 4249 txn := s.db.ReadTxn() 4250 4251 // Get an iterator over the accessors 4252 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 4253 if err != nil { 4254 return nil, err 4255 } 4256 4257 ws.Add(iter.WatchCh()) 4258 4259 var out []*structs.VaultAccessor 4260 for { 4261 raw := iter.Next() 4262 if raw == nil { 4263 break 4264 } 4265 out = append(out, raw.(*structs.VaultAccessor)) 4266 } 4267 return out, nil 4268 } 4269 4270 // VaultAccessorsByNode returns all the Vault accessors by node id 4271 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 4272 txn := s.db.ReadTxn() 4273 4274 // Get an iterator over the accessors 4275 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 4276 if err != nil { 4277 return nil, err 4278 } 4279 4280 ws.Add(iter.WatchCh()) 4281 4282 var out []*structs.VaultAccessor 4283 for { 4284 raw := iter.Next() 4285 if raw == nil { 4286 break 4287 } 4288 out = append(out, raw.(*structs.VaultAccessor)) 4289 } 4290 return out, nil 4291 } 4292 4293 func indexEntry(table string, index uint64) *IndexEntry { 4294 return &IndexEntry{ 4295 Key: table, 4296 Value: index, 4297 } 4298 } 4299 4300 const siTokenAccessorTable = "si_token_accessors" 4301 4302 // UpsertSITokenAccessors is used to register a set of Service Identity token accessors. 4303 func (s *StateStore) UpsertSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 4304 txn := s.db.WriteTxn(index) 4305 defer txn.Abort() 4306 4307 for _, accessor := range accessors { 4308 // set the create index 4309 accessor.CreateIndex = index 4310 4311 // insert the accessor 4312 if err := txn.Insert(siTokenAccessorTable, accessor); err != nil { 4313 return fmt.Errorf("accessor insert failed: %w", err) 4314 } 4315 } 4316 4317 // update the index for this table 4318 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 4319 return fmt.Errorf("index update failed: %w", err) 4320 } 4321 4322 return txn.Commit() 4323 } 4324 4325 // DeleteSITokenAccessors is used to delete a set of Service Identity token accessors. 4326 func (s *StateStore) DeleteSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 4327 txn := s.db.WriteTxn(index) 4328 defer txn.Abort() 4329 4330 // Lookup each accessor 4331 for _, accessor := range accessors { 4332 // Delete the accessor 4333 if err := txn.Delete(siTokenAccessorTable, accessor); err != nil { 4334 return fmt.Errorf("accessor delete failed: %w", err) 4335 } 4336 } 4337 4338 // update the index for this table 4339 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 4340 return fmt.Errorf("index update failed: %w", err) 4341 } 4342 4343 return txn.Commit() 4344 } 4345 4346 // SITokenAccessor returns the given Service Identity token accessor. 4347 func (s *StateStore) SITokenAccessor(ws memdb.WatchSet, accessorID string) (*structs.SITokenAccessor, error) { 4348 txn := s.db.ReadTxn() 4349 defer txn.Abort() 4350 4351 watchCh, existing, err := txn.FirstWatch(siTokenAccessorTable, "id", accessorID) 4352 if err != nil { 4353 return nil, fmt.Errorf("accessor lookup failed: %w", err) 4354 } 4355 4356 ws.Add(watchCh) 4357 4358 if existing != nil { 4359 return existing.(*structs.SITokenAccessor), nil 4360 } 4361 4362 return nil, nil 4363 } 4364 4365 // SITokenAccessors returns an iterator of Service Identity token accessors. 4366 func (s *StateStore) SITokenAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4367 txn := s.db.ReadTxn() 4368 defer txn.Abort() 4369 4370 iter, err := txn.Get(siTokenAccessorTable, "id") 4371 if err != nil { 4372 return nil, err 4373 } 4374 4375 ws.Add(iter.WatchCh()) 4376 4377 return iter, nil 4378 } 4379 4380 // SITokenAccessorsByAlloc returns all the Service Identity token accessors by alloc ID. 4381 func (s *StateStore) SITokenAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.SITokenAccessor, error) { 4382 txn := s.db.ReadTxn() 4383 defer txn.Abort() 4384 4385 // Get an iterator over the accessors 4386 iter, err := txn.Get(siTokenAccessorTable, "alloc_id", allocID) 4387 if err != nil { 4388 return nil, err 4389 } 4390 4391 ws.Add(iter.WatchCh()) 4392 4393 var result []*structs.SITokenAccessor 4394 for raw := iter.Next(); raw != nil; raw = iter.Next() { 4395 result = append(result, raw.(*structs.SITokenAccessor)) 4396 } 4397 4398 return result, nil 4399 } 4400 4401 // SITokenAccessorsByNode returns all the Service Identity token accessors by node ID. 4402 func (s *StateStore) SITokenAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.SITokenAccessor, error) { 4403 txn := s.db.ReadTxn() 4404 defer txn.Abort() 4405 4406 // Get an iterator over the accessors 4407 iter, err := txn.Get(siTokenAccessorTable, "node_id", nodeID) 4408 if err != nil { 4409 return nil, err 4410 } 4411 4412 ws.Add(iter.WatchCh()) 4413 4414 var result []*structs.SITokenAccessor 4415 for raw := iter.Next(); raw != nil; raw = iter.Next() { 4416 result = append(result, raw.(*structs.SITokenAccessor)) 4417 } 4418 4419 return result, nil 4420 } 4421 4422 // UpdateDeploymentStatus is used to make deployment status updates and 4423 // potentially make a evaluation 4424 func (s *StateStore) UpdateDeploymentStatus(msgType structs.MessageType, index uint64, req *structs.DeploymentStatusUpdateRequest) error { 4425 txn := s.db.WriteTxnMsgT(msgType, index) 4426 defer txn.Abort() 4427 4428 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 4429 return err 4430 } 4431 4432 // Upsert the job if necessary 4433 if req.Job != nil { 4434 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 4435 return err 4436 } 4437 } 4438 4439 // Upsert the optional eval 4440 if req.Eval != nil { 4441 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4442 return err 4443 } 4444 } 4445 4446 return txn.Commit() 4447 } 4448 4449 // updateDeploymentStatusImpl is used to make deployment status updates 4450 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *txn) error { 4451 // Retrieve deployment 4452 ws := memdb.NewWatchSet() 4453 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 4454 if err != nil { 4455 return err 4456 } else if deployment == nil { 4457 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 4458 } else if !deployment.Active() { 4459 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4460 } 4461 4462 // Apply the new status 4463 copy := deployment.Copy() 4464 copy.Status = u.Status 4465 copy.StatusDescription = u.StatusDescription 4466 copy.ModifyIndex = index 4467 4468 // Insert the deployment 4469 if err := txn.Insert("deployment", copy); err != nil { 4470 return err 4471 } 4472 4473 // Update the index 4474 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 4475 return fmt.Errorf("index update failed: %v", err) 4476 } 4477 4478 // If the deployment is being marked as complete, set the job to stable. 4479 if copy.Status == structs.DeploymentStatusSuccessful { 4480 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 4481 return fmt.Errorf("failed to update job stability: %v", err) 4482 } 4483 } 4484 4485 return nil 4486 } 4487 4488 // UpdateJobStability updates the stability of the given job and version to the 4489 // desired status. 4490 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 4491 txn := s.db.WriteTxn(index) 4492 defer txn.Abort() 4493 4494 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 4495 return err 4496 } 4497 4498 return txn.Commit() 4499 } 4500 4501 // updateJobStabilityImpl updates the stability of the given job and version 4502 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *txn) error { 4503 // Get the job that is referenced 4504 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 4505 if err != nil { 4506 return err 4507 } 4508 4509 // Has already been cleared, nothing to do 4510 if job == nil { 4511 return nil 4512 } 4513 4514 // If the job already has the desired stability, nothing to do 4515 if job.Stable == stable { 4516 return nil 4517 } 4518 4519 copy := job.Copy() 4520 copy.Stable = stable 4521 return s.upsertJobImpl(index, copy, true, txn) 4522 } 4523 4524 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 4525 // potentially make a evaluation 4526 func (s *StateStore) UpdateDeploymentPromotion(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 4527 txn := s.db.WriteTxnMsgT(msgType, index) 4528 defer txn.Abort() 4529 4530 // Retrieve deployment and ensure it is not terminal and is active 4531 ws := memdb.NewWatchSet() 4532 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 4533 if err != nil { 4534 return err 4535 } else if deployment == nil { 4536 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 4537 } else if !deployment.Active() { 4538 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4539 } 4540 4541 // Retrieve effected allocations 4542 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 4543 if err != nil { 4544 return err 4545 } 4546 4547 // groupIndex is a map of groups being promoted 4548 groupIndex := make(map[string]struct{}, len(req.Groups)) 4549 for _, g := range req.Groups { 4550 groupIndex[g] = struct{}{} 4551 } 4552 4553 // canaryIndex is the set of placed canaries in the deployment 4554 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 4555 for _, dstate := range deployment.TaskGroups { 4556 for _, c := range dstate.PlacedCanaries { 4557 canaryIndex[c] = struct{}{} 4558 } 4559 } 4560 4561 // healthyCounts is a mapping of group to the number of healthy canaries 4562 healthyCounts := make(map[string]int, len(deployment.TaskGroups)) 4563 4564 // promotable is the set of allocations that we can move from canary to 4565 // non-canary 4566 var promotable []*structs.Allocation 4567 4568 for { 4569 raw := iter.Next() 4570 if raw == nil { 4571 break 4572 } 4573 4574 alloc := raw.(*structs.Allocation) 4575 4576 // Check that the alloc is a canary 4577 if _, ok := canaryIndex[alloc.ID]; !ok { 4578 continue 4579 } 4580 4581 // Check that the canary is part of a group being promoted 4582 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 4583 continue 4584 } 4585 4586 // Ensure the canaries are healthy 4587 if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() { 4588 continue 4589 } 4590 4591 healthyCounts[alloc.TaskGroup]++ 4592 promotable = append(promotable, alloc) 4593 } 4594 4595 // Determine if we have enough healthy allocations 4596 var unhealthyErr multierror.Error 4597 for tg, dstate := range deployment.TaskGroups { 4598 if _, ok := groupIndex[tg]; !req.All && !ok { 4599 continue 4600 } 4601 4602 need := dstate.DesiredCanaries 4603 if need == 0 { 4604 continue 4605 } 4606 4607 if have := healthyCounts[tg]; have < need { 4608 multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need)) 4609 } 4610 } 4611 4612 if err := unhealthyErr.ErrorOrNil(); err != nil { 4613 return err 4614 } 4615 4616 // Update deployment 4617 copy := deployment.Copy() 4618 copy.ModifyIndex = index 4619 for tg, status := range copy.TaskGroups { 4620 _, ok := groupIndex[tg] 4621 if !req.All && !ok { 4622 continue 4623 } 4624 4625 // reset the progress deadline 4626 if status.ProgressDeadline > 0 && !status.RequireProgressBy.IsZero() { 4627 status.RequireProgressBy = time.Now().Add(status.ProgressDeadline) 4628 } 4629 status.Promoted = true 4630 } 4631 4632 // If the deployment no longer needs promotion, update its status 4633 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 4634 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 4635 } 4636 4637 // Insert the deployment 4638 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 4639 return err 4640 } 4641 4642 // Upsert the optional eval 4643 if req.Eval != nil { 4644 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4645 return err 4646 } 4647 } 4648 4649 // For each promotable allocation remove the canary field 4650 for _, alloc := range promotable { 4651 promoted := alloc.Copy() 4652 promoted.DeploymentStatus.Canary = false 4653 promoted.DeploymentStatus.ModifyIndex = index 4654 promoted.ModifyIndex = index 4655 promoted.AllocModifyIndex = index 4656 4657 if err := txn.Insert("allocs", promoted); err != nil { 4658 return fmt.Errorf("alloc insert failed: %v", err) 4659 } 4660 } 4661 4662 // Update the alloc index 4663 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4664 return fmt.Errorf("index update failed: %v", err) 4665 } 4666 4667 return txn.Commit() 4668 } 4669 4670 // UpdateDeploymentAllocHealth is used to update the health of allocations as 4671 // part of the deployment and potentially make a evaluation 4672 func (s *StateStore) UpdateDeploymentAllocHealth(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 4673 txn := s.db.WriteTxnMsgT(msgType, index) 4674 defer txn.Abort() 4675 4676 // Retrieve deployment and ensure it is not terminal and is active 4677 ws := memdb.NewWatchSet() 4678 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 4679 if err != nil { 4680 return err 4681 } else if deployment == nil { 4682 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 4683 } else if !deployment.Active() { 4684 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4685 } 4686 4687 // Update the health status of each allocation 4688 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 4689 setAllocHealth := func(id string, healthy bool, ts time.Time) error { 4690 existing, err := txn.First("allocs", "id", id) 4691 if err != nil { 4692 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 4693 } 4694 if existing == nil { 4695 return fmt.Errorf("unknown alloc %q", id) 4696 } 4697 4698 old := existing.(*structs.Allocation) 4699 if old.DeploymentID != req.DeploymentID { 4700 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 4701 } 4702 4703 // Set the health 4704 copy := old.Copy() 4705 if copy.DeploymentStatus == nil { 4706 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 4707 } 4708 copy.DeploymentStatus.Healthy = pointer.Of(healthy) 4709 copy.DeploymentStatus.Timestamp = ts 4710 copy.DeploymentStatus.ModifyIndex = index 4711 copy.ModifyIndex = index 4712 4713 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 4714 return fmt.Errorf("error updating deployment: %v", err) 4715 } 4716 4717 if err := txn.Insert("allocs", copy); err != nil { 4718 return fmt.Errorf("alloc insert failed: %v", err) 4719 } 4720 4721 return nil 4722 } 4723 4724 for _, id := range req.HealthyAllocationIDs { 4725 if err := setAllocHealth(id, true, req.Timestamp); err != nil { 4726 return err 4727 } 4728 } 4729 for _, id := range req.UnhealthyAllocationIDs { 4730 if err := setAllocHealth(id, false, req.Timestamp); err != nil { 4731 return err 4732 } 4733 } 4734 4735 // Update the indexes 4736 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4737 return fmt.Errorf("index update failed: %v", err) 4738 } 4739 } 4740 4741 // Update the deployment status as needed. 4742 if req.DeploymentUpdate != nil { 4743 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 4744 return err 4745 } 4746 } 4747 4748 // Upsert the job if necessary 4749 if req.Job != nil { 4750 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 4751 return err 4752 } 4753 } 4754 4755 // Upsert the optional eval 4756 if req.Eval != nil { 4757 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4758 return err 4759 } 4760 } 4761 4762 return txn.Commit() 4763 } 4764 4765 // LatestIndex returns the greatest index value for all indexes. 4766 func (s *StateStore) LatestIndex() (uint64, error) { 4767 indexes, err := s.Indexes() 4768 if err != nil { 4769 return 0, err 4770 } 4771 4772 var max uint64 = 0 4773 for { 4774 raw := indexes.Next() 4775 if raw == nil { 4776 break 4777 } 4778 4779 // Prepare the request struct 4780 idx := raw.(*IndexEntry) 4781 4782 // Determine the max 4783 if idx.Value > max { 4784 max = idx.Value 4785 } 4786 } 4787 4788 return max, nil 4789 } 4790 4791 // Index finds the matching index value 4792 func (s *StateStore) Index(name string) (uint64, error) { 4793 txn := s.db.ReadTxn() 4794 4795 // Lookup the first matching index 4796 out, err := txn.First("index", "id", name) 4797 if err != nil { 4798 return 0, err 4799 } 4800 if out == nil { 4801 return 0, nil 4802 } 4803 return out.(*IndexEntry).Value, nil 4804 } 4805 4806 // Indexes returns an iterator over all the indexes 4807 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 4808 txn := s.db.ReadTxn() 4809 4810 // Walk the entire nodes table 4811 iter, err := txn.Get("index", "id") 4812 if err != nil { 4813 return nil, err 4814 } 4815 return iter, nil 4816 } 4817 4818 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 4819 // store 4820 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 4821 txn := s.db.WriteTxn(index) 4822 defer txn.Abort() 4823 4824 // Get all the jobs 4825 iter, err := txn.Get("jobs", "id") 4826 if err != nil { 4827 return err 4828 } 4829 // COMPAT: Remove after 0.11 4830 // Iterate over jobs to build a list of parent jobs and their children 4831 parentMap := make(map[string][]*structs.Job) 4832 for { 4833 rawJob := iter.Next() 4834 if rawJob == nil { 4835 break 4836 } 4837 job := rawJob.(*structs.Job) 4838 if job.ParentID != "" { 4839 children := parentMap[job.ParentID] 4840 children = append(children, job) 4841 parentMap[job.ParentID] = children 4842 } 4843 } 4844 4845 // Get all the jobs again 4846 iter, err = txn.Get("jobs", "id") 4847 if err != nil { 4848 return err 4849 } 4850 4851 for { 4852 rawJob := iter.Next() 4853 if rawJob == nil { 4854 break 4855 } 4856 job := rawJob.(*structs.Job) 4857 4858 if job.IsParameterized() || job.IsPeriodic() { 4859 // COMPAT: Remove after 0.11 4860 4861 // The following block of code fixes incorrect child summaries due to a bug 4862 // See https://github.com/hashicorp/nomad/issues/3886 for details 4863 rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID) 4864 if err != nil { 4865 return err 4866 } 4867 if rawSummary == nil { 4868 continue 4869 } 4870 4871 oldSummary := rawSummary.(*structs.JobSummary) 4872 4873 // Create an empty summary 4874 summary := &structs.JobSummary{ 4875 JobID: job.ID, 4876 Namespace: job.Namespace, 4877 Summary: make(map[string]structs.TaskGroupSummary), 4878 Children: &structs.JobChildrenSummary{}, 4879 } 4880 4881 // Iterate over children of this job if any to fix summary counts 4882 children := parentMap[job.ID] 4883 for _, childJob := range children { 4884 switch childJob.Status { 4885 case structs.JobStatusPending: 4886 summary.Children.Pending++ 4887 case structs.JobStatusDead: 4888 summary.Children.Dead++ 4889 case structs.JobStatusRunning: 4890 summary.Children.Running++ 4891 } 4892 } 4893 4894 // Insert the job summary if its different 4895 if !reflect.DeepEqual(summary, oldSummary) { 4896 // Set the create index of the summary same as the job's create index 4897 // and the modify index to the current index 4898 summary.CreateIndex = job.CreateIndex 4899 summary.ModifyIndex = index 4900 4901 if err := txn.Insert("job_summary", summary); err != nil { 4902 return fmt.Errorf("error inserting job summary: %v", err) 4903 } 4904 } 4905 4906 // Done with handling a parent job, continue to next 4907 continue 4908 } 4909 4910 // Create a job summary for the job 4911 summary := &structs.JobSummary{ 4912 JobID: job.ID, 4913 Namespace: job.Namespace, 4914 Summary: make(map[string]structs.TaskGroupSummary), 4915 } 4916 for _, tg := range job.TaskGroups { 4917 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 4918 } 4919 4920 // Find all the allocations for the jobs 4921 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 4922 if err != nil { 4923 return err 4924 } 4925 4926 // Calculate the summary for the job 4927 for { 4928 rawAlloc := iterAllocs.Next() 4929 if rawAlloc == nil { 4930 break 4931 } 4932 alloc := rawAlloc.(*structs.Allocation) 4933 4934 // Ignore the allocation if it doesn't belong to the currently 4935 // registered job. The allocation is checked because of issue #2304 4936 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 4937 continue 4938 } 4939 4940 tg := summary.Summary[alloc.TaskGroup] 4941 switch alloc.ClientStatus { 4942 case structs.AllocClientStatusFailed: 4943 tg.Failed += 1 4944 case structs.AllocClientStatusLost: 4945 tg.Lost += 1 4946 case structs.AllocClientStatusUnknown: 4947 tg.Unknown += 1 4948 case structs.AllocClientStatusComplete: 4949 tg.Complete += 1 4950 case structs.AllocClientStatusRunning: 4951 tg.Running += 1 4952 case structs.AllocClientStatusPending: 4953 tg.Starting += 1 4954 default: 4955 s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID) 4956 } 4957 summary.Summary[alloc.TaskGroup] = tg 4958 } 4959 4960 // Set the create index of the summary same as the job's create index 4961 // and the modify index to the current index 4962 summary.CreateIndex = job.CreateIndex 4963 summary.ModifyIndex = index 4964 4965 // Insert the job summary 4966 if err := txn.Insert("job_summary", summary); err != nil { 4967 return fmt.Errorf("error inserting job summary: %v", err) 4968 } 4969 } 4970 4971 // Update the indexes table for job summary 4972 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4973 return fmt.Errorf("index update failed: %v", err) 4974 } 4975 return txn.Commit() 4976 } 4977 4978 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 4979 // It takes a map of job IDs to an optional forceStatus string. It returns an 4980 // error if the job doesn't exist or setJobStatus fails. 4981 func (s *StateStore) setJobStatuses(index uint64, txn *txn, 4982 jobs map[structs.NamespacedID]string, evalDelete bool) error { 4983 for tuple, forceStatus := range jobs { 4984 4985 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 4986 if err != nil { 4987 return fmt.Errorf("job lookup failed: %v", err) 4988 } 4989 4990 if existing == nil { 4991 continue 4992 } 4993 4994 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 4995 return err 4996 } 4997 4998 } 4999 5000 return nil 5001 } 5002 5003 // setJobStatus sets the status of the job by looking up associated evaluations 5004 // and allocations. evalDelete should be set to true if setJobStatus is being 5005 // called because an evaluation is being deleted (potentially because of garbage 5006 // collection). If forceStatus is non-empty, the job's status will be set to the 5007 // passed status. 5008 func (s *StateStore) setJobStatus(index uint64, txn *txn, 5009 job *structs.Job, evalDelete bool, forceStatus string) error { 5010 5011 // Capture the current status so we can check if there is a change 5012 oldStatus := job.Status 5013 newStatus := forceStatus 5014 5015 // If forceStatus is not set, compute the jobs status. 5016 if forceStatus == "" { 5017 var err error 5018 newStatus, err = s.getJobStatus(txn, job, evalDelete) 5019 if err != nil { 5020 return err 5021 } 5022 } 5023 5024 // Fast-path if the job has not changed. 5025 if oldStatus == newStatus { 5026 return nil 5027 } 5028 5029 // Copy and update the existing job 5030 updated := job.Copy() 5031 updated.Status = newStatus 5032 updated.ModifyIndex = index 5033 5034 // Insert the job 5035 if err := txn.Insert("jobs", updated); err != nil { 5036 return fmt.Errorf("job insert failed: %v", err) 5037 } 5038 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 5039 return fmt.Errorf("index update failed: %v", err) 5040 } 5041 5042 // Update the children summary 5043 if err := s.setJobSummary(txn, updated, index, oldStatus, newStatus); err != nil { 5044 return fmt.Errorf("job summary update failed %w", err) 5045 } 5046 return nil 5047 } 5048 5049 func (s *StateStore) setJobSummary(txn *txn, updated *structs.Job, index uint64, oldStatus, newStatus string) error { 5050 if updated.ParentID == "" { 5051 return nil 5052 } 5053 5054 // Try to update the summary of the parent job summary 5055 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 5056 if err != nil { 5057 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 5058 } 5059 5060 // Only continue if the summary exists. It could not exist if the parent 5061 // job was removed 5062 if summaryRaw != nil { 5063 existing := summaryRaw.(*structs.JobSummary) 5064 pSummary := existing.Copy() 5065 if pSummary.Children == nil { 5066 pSummary.Children = new(structs.JobChildrenSummary) 5067 } 5068 5069 // Determine the transition and update the correct fields 5070 children := pSummary.Children 5071 5072 // Decrement old status 5073 if oldStatus != "" { 5074 switch oldStatus { 5075 case structs.JobStatusPending: 5076 children.Pending-- 5077 case structs.JobStatusRunning: 5078 children.Running-- 5079 case structs.JobStatusDead: 5080 children.Dead-- 5081 default: 5082 return fmt.Errorf("unknown old job status %q", oldStatus) 5083 } 5084 } 5085 5086 // Increment new status 5087 switch newStatus { 5088 case structs.JobStatusPending: 5089 children.Pending++ 5090 case structs.JobStatusRunning: 5091 children.Running++ 5092 case structs.JobStatusDead: 5093 children.Dead++ 5094 default: 5095 return fmt.Errorf("unknown new job status %q", newStatus) 5096 } 5097 5098 // Update the index 5099 pSummary.ModifyIndex = index 5100 5101 // Insert the summary 5102 if err := txn.Insert("job_summary", pSummary); err != nil { 5103 return fmt.Errorf("job summary insert failed: %v", err) 5104 } 5105 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5106 return fmt.Errorf("index update failed: %v", err) 5107 } 5108 } 5109 return nil 5110 } 5111 5112 func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) { 5113 // System, Periodic and Parameterized jobs are running until explicitly 5114 // stopped. 5115 if job.Type == structs.JobTypeSystem || 5116 job.IsParameterized() || 5117 job.IsPeriodic() { 5118 if job.Stop { 5119 return structs.JobStatusDead, nil 5120 } 5121 return structs.JobStatusRunning, nil 5122 } 5123 5124 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 5125 if err != nil { 5126 return "", err 5127 } 5128 5129 // If there is a non-terminal allocation, the job is running. 5130 hasAlloc := false 5131 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 5132 hasAlloc = true 5133 if !alloc.(*structs.Allocation).TerminalStatus() { 5134 return structs.JobStatusRunning, nil 5135 } 5136 } 5137 5138 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 5139 if err != nil { 5140 return "", err 5141 } 5142 5143 hasEval := false 5144 for raw := evals.Next(); raw != nil; raw = evals.Next() { 5145 e := raw.(*structs.Evaluation) 5146 5147 // Filter non-exact matches 5148 if e.JobID != job.ID { 5149 continue 5150 } 5151 5152 hasEval = true 5153 if !e.TerminalStatus() { 5154 return structs.JobStatusPending, nil 5155 } 5156 } 5157 5158 // The job is dead if all the allocations and evals are terminal or if there 5159 // are no evals because of garbage collection. 5160 if evalDelete || hasEval || hasAlloc { 5161 return structs.JobStatusDead, nil 5162 } 5163 5164 return structs.JobStatusPending, nil 5165 } 5166 5167 // updateSummaryWithJob creates or updates job summaries when new jobs are 5168 // upserted or existing ones are updated 5169 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 5170 txn *txn) error { 5171 5172 // Update the job summary 5173 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 5174 if err != nil { 5175 return fmt.Errorf("job summary lookup failed: %v", err) 5176 } 5177 5178 // Get the summary or create if necessary 5179 var summary *structs.JobSummary 5180 hasSummaryChanged := false 5181 if summaryRaw != nil { 5182 summary = summaryRaw.(*structs.JobSummary).Copy() 5183 } else { 5184 summary = &structs.JobSummary{ 5185 JobID: job.ID, 5186 Namespace: job.Namespace, 5187 Summary: make(map[string]structs.TaskGroupSummary), 5188 Children: new(structs.JobChildrenSummary), 5189 CreateIndex: index, 5190 } 5191 hasSummaryChanged = true 5192 } 5193 5194 for _, tg := range job.TaskGroups { 5195 if _, ok := summary.Summary[tg.Name]; !ok { 5196 newSummary := structs.TaskGroupSummary{ 5197 Complete: 0, 5198 Failed: 0, 5199 Running: 0, 5200 Starting: 0, 5201 } 5202 summary.Summary[tg.Name] = newSummary 5203 hasSummaryChanged = true 5204 } 5205 } 5206 5207 // The job summary has changed, so update the modify index. 5208 if hasSummaryChanged { 5209 summary.ModifyIndex = index 5210 5211 // Update the indexes table for job summary 5212 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5213 return fmt.Errorf("index update failed: %v", err) 5214 } 5215 if err := txn.Insert("job_summary", summary); err != nil { 5216 return err 5217 } 5218 } 5219 5220 return nil 5221 } 5222 5223 // updateJobScalingPolicies upserts any scaling policies contained in the job and removes 5224 // any previous scaling policies that were removed from the job 5225 func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 5226 5227 ws := memdb.NewWatchSet() 5228 5229 scalingPolicies := job.GetScalingPolicies() 5230 newTargets := map[string]bool{} 5231 for _, p := range scalingPolicies { 5232 newTargets[p.JobKey()] = true 5233 } 5234 // find existing policies that need to be deleted 5235 deletedPolicies := []string{} 5236 iter, err := s.ScalingPoliciesByJobTxn(ws, job.Namespace, job.ID, txn) 5237 if err != nil { 5238 return fmt.Errorf("ScalingPoliciesByJob lookup failed: %v", err) 5239 } 5240 for raw := iter.Next(); raw != nil; raw = iter.Next() { 5241 oldPolicy := raw.(*structs.ScalingPolicy) 5242 if !newTargets[oldPolicy.JobKey()] { 5243 deletedPolicies = append(deletedPolicies, oldPolicy.ID) 5244 } 5245 } 5246 err = s.DeleteScalingPoliciesTxn(index, deletedPolicies, txn) 5247 if err != nil { 5248 return fmt.Errorf("DeleteScalingPolicies of removed policies failed: %v", err) 5249 } 5250 5251 err = s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn) 5252 if err != nil { 5253 return fmt.Errorf("UpsertScalingPolicies of policies failed: %v", err) 5254 } 5255 5256 return nil 5257 } 5258 5259 // updateJobCSIPlugins runs on job update, and indexes the job in the plugin 5260 func (s *StateStore) updateJobCSIPlugins(index uint64, job, prev *structs.Job, txn *txn) error { 5261 plugIns := make(map[string]*structs.CSIPlugin) 5262 5263 upsertFn := func(job *structs.Job, delete bool) error { 5264 for _, tg := range job.TaskGroups { 5265 for _, t := range tg.Tasks { 5266 if t.CSIPluginConfig == nil { 5267 continue 5268 } 5269 5270 plugIn, ok := plugIns[t.CSIPluginConfig.ID] 5271 if !ok { 5272 p, err := s.CSIPluginByIDTxn(txn, nil, t.CSIPluginConfig.ID) 5273 if err != nil { 5274 return err 5275 } 5276 if p == nil { 5277 plugIn = structs.NewCSIPlugin(t.CSIPluginConfig.ID, index) 5278 } else { 5279 plugIn = p.Copy() 5280 plugIn.ModifyIndex = index 5281 } 5282 plugIns[plugIn.ID] = plugIn 5283 } 5284 5285 if delete { 5286 plugIn.DeleteJob(job, nil) 5287 } else { 5288 plugIn.AddJob(job, nil) 5289 } 5290 } 5291 } 5292 5293 return nil 5294 } 5295 5296 if prev != nil { 5297 err := upsertFn(prev, true) 5298 if err != nil { 5299 return err 5300 } 5301 } 5302 5303 err := upsertFn(job, false) 5304 if err != nil { 5305 return err 5306 } 5307 5308 for _, plugIn := range plugIns { 5309 err = txn.Insert("csi_plugins", plugIn) 5310 if err != nil { 5311 return fmt.Errorf("csi_plugins insert error: %v", err) 5312 } 5313 } 5314 5315 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 5316 return fmt.Errorf("index update failed: %v", err) 5317 } 5318 5319 return nil 5320 } 5321 5322 // updateDeploymentWithAlloc is used to update the deployment state associated 5323 // with the given allocation. The passed alloc may be updated if the deployment 5324 // status has changed to capture the modify index at which it has changed. 5325 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *txn) error { 5326 // Nothing to do if the allocation is not associated with a deployment 5327 if alloc.DeploymentID == "" { 5328 return nil 5329 } 5330 5331 // Get the deployment 5332 ws := memdb.NewWatchSet() 5333 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 5334 if err != nil { 5335 return err 5336 } 5337 if deployment == nil { 5338 return nil 5339 } 5340 5341 // Retrieve the deployment state object 5342 _, ok := deployment.TaskGroups[alloc.TaskGroup] 5343 if !ok { 5344 // If the task group isn't part of the deployment, the task group wasn't 5345 // part of a rolling update so nothing to do 5346 return nil 5347 } 5348 5349 // Do not modify in-place. Instead keep track of what must be done 5350 placed := 0 5351 healthy := 0 5352 unhealthy := 0 5353 5354 // If there was no existing allocation, this is a placement and we increment 5355 // the placement 5356 existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth() 5357 allocHealthSet := alloc.DeploymentStatus.HasHealth() 5358 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 5359 placed++ 5360 } else if !existingHealthSet && allocHealthSet { 5361 if *alloc.DeploymentStatus.Healthy { 5362 healthy++ 5363 } else { 5364 unhealthy++ 5365 } 5366 } else if existingHealthSet && allocHealthSet { 5367 // See if it has gone from healthy to unhealthy 5368 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 5369 healthy-- 5370 unhealthy++ 5371 } 5372 } 5373 5374 // Nothing to do 5375 if placed == 0 && healthy == 0 && unhealthy == 0 { 5376 return nil 5377 } 5378 5379 // Update the allocation's deployment status modify index 5380 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 5381 alloc.DeploymentStatus.ModifyIndex = index 5382 } 5383 5384 // Create a copy of the deployment object 5385 deploymentCopy := deployment.Copy() 5386 deploymentCopy.ModifyIndex = index 5387 5388 dstate := deploymentCopy.TaskGroups[alloc.TaskGroup] 5389 dstate.PlacedAllocs += placed 5390 dstate.HealthyAllocs += healthy 5391 dstate.UnhealthyAllocs += unhealthy 5392 5393 // Ensure PlacedCanaries accurately reflects the alloc canary status 5394 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary { 5395 found := false 5396 for _, canary := range dstate.PlacedCanaries { 5397 if alloc.ID == canary { 5398 found = true 5399 break 5400 } 5401 } 5402 if !found { 5403 dstate.PlacedCanaries = append(dstate.PlacedCanaries, alloc.ID) 5404 } 5405 } 5406 5407 // Update the progress deadline 5408 if pd := dstate.ProgressDeadline; pd != 0 { 5409 // If we are the first placed allocation for the deployment start the progress deadline. 5410 if placed != 0 && dstate.RequireProgressBy.IsZero() { 5411 // Use modify time instead of create time because we may in-place 5412 // update the allocation to be part of a new deployment. 5413 dstate.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd) 5414 } else if healthy != 0 { 5415 if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(dstate.RequireProgressBy) { 5416 dstate.RequireProgressBy = d 5417 } 5418 } 5419 } 5420 5421 // Upsert the deployment 5422 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 5423 return err 5424 } 5425 5426 return nil 5427 } 5428 5429 // updateSummaryWithAlloc updates the job summary when allocations are updated 5430 // or inserted 5431 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 5432 existingAlloc *structs.Allocation, txn *txn) error { 5433 5434 // We don't have to update the summary if the job is missing 5435 if alloc.Job == nil { 5436 return nil 5437 } 5438 5439 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 5440 if err != nil { 5441 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 5442 } 5443 5444 if summaryRaw == nil { 5445 // Check if the job is de-registered 5446 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 5447 if err != nil { 5448 return fmt.Errorf("unable to query job: %v", err) 5449 } 5450 5451 // If the job is de-registered then we skip updating it's summary 5452 if rawJob == nil { 5453 return nil 5454 } 5455 5456 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 5457 } 5458 5459 // Get a copy of the existing summary 5460 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 5461 5462 // Not updating the job summary because the allocation doesn't belong to the 5463 // currently registered job 5464 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 5465 return nil 5466 } 5467 5468 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 5469 if !ok { 5470 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 5471 } 5472 5473 summaryChanged := false 5474 if existingAlloc == nil { 5475 switch alloc.DesiredStatus { 5476 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 5477 s.logger.Error("new allocation inserted into state store with bad desired status", 5478 "alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus) 5479 } 5480 switch alloc.ClientStatus { 5481 case structs.AllocClientStatusPending: 5482 tgSummary.Starting += 1 5483 if tgSummary.Queued > 0 { 5484 tgSummary.Queued -= 1 5485 } 5486 summaryChanged = true 5487 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 5488 structs.AllocClientStatusComplete: 5489 s.logger.Error("new allocation inserted into state store with bad client status", 5490 "alloc_id", alloc.ID, "client_status", alloc.ClientStatus) 5491 } 5492 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 5493 // Incrementing the client of the bin of the current state 5494 switch alloc.ClientStatus { 5495 case structs.AllocClientStatusRunning: 5496 tgSummary.Running += 1 5497 case structs.AllocClientStatusFailed: 5498 tgSummary.Failed += 1 5499 case structs.AllocClientStatusPending: 5500 tgSummary.Starting += 1 5501 case structs.AllocClientStatusComplete: 5502 tgSummary.Complete += 1 5503 case structs.AllocClientStatusLost: 5504 tgSummary.Lost += 1 5505 case structs.AllocClientStatusUnknown: 5506 tgSummary.Unknown += 1 5507 } 5508 5509 // Decrementing the count of the bin of the last state 5510 switch existingAlloc.ClientStatus { 5511 case structs.AllocClientStatusRunning: 5512 if tgSummary.Running > 0 { 5513 tgSummary.Running -= 1 5514 } 5515 case structs.AllocClientStatusPending: 5516 if tgSummary.Starting > 0 { 5517 tgSummary.Starting -= 1 5518 } 5519 case structs.AllocClientStatusLost: 5520 if tgSummary.Lost > 0 { 5521 tgSummary.Lost -= 1 5522 } 5523 case structs.AllocClientStatusUnknown: 5524 if tgSummary.Unknown > 0 { 5525 tgSummary.Unknown -= 1 5526 } 5527 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 5528 default: 5529 s.logger.Error("invalid old client status for allocation", 5530 "alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus) 5531 } 5532 summaryChanged = true 5533 } 5534 jobSummary.Summary[alloc.TaskGroup] = tgSummary 5535 5536 if summaryChanged { 5537 jobSummary.ModifyIndex = index 5538 5539 s.updatePluginWithJobSummary(index, jobSummary, alloc, txn) 5540 5541 // Update the indexes table for job summary 5542 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5543 return fmt.Errorf("index update failed: %v", err) 5544 } 5545 5546 if err := txn.Insert("job_summary", jobSummary); err != nil { 5547 return fmt.Errorf("updating job summary failed: %v", err) 5548 } 5549 } 5550 5551 return nil 5552 } 5553 5554 // updatePluginForTerminalAlloc updates the CSI plugins for an alloc when the 5555 // allocation is updated or inserted with a terminal server status. 5556 func (s *StateStore) updatePluginForTerminalAlloc(index uint64, alloc *structs.Allocation, 5557 txn *txn) error { 5558 5559 if !alloc.ServerTerminalStatus() { 5560 return nil 5561 } 5562 5563 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 5564 for _, t := range tg.Tasks { 5565 if t.CSIPluginConfig != nil { 5566 pluginID := t.CSIPluginConfig.ID 5567 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 5568 if err != nil { 5569 return err 5570 } 5571 if plug == nil { 5572 // plugin may not have been created because it never 5573 // became healthy, just move on 5574 return nil 5575 } 5576 plug = plug.Copy() 5577 err = plug.DeleteAlloc(alloc.ID, alloc.NodeID) 5578 if err != nil { 5579 return err 5580 } 5581 err = updateOrGCPlugin(index, txn, plug) 5582 if err != nil { 5583 return err 5584 } 5585 } 5586 } 5587 5588 return nil 5589 } 5590 5591 // updatePluginWithJobSummary updates the CSI plugins for a job when the 5592 // job summary is updated by an alloc 5593 func (s *StateStore) updatePluginWithJobSummary(index uint64, summary *structs.JobSummary, alloc *structs.Allocation, 5594 txn *txn) error { 5595 5596 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 5597 if tg == nil { 5598 return nil 5599 } 5600 5601 for _, t := range tg.Tasks { 5602 if t.CSIPluginConfig != nil { 5603 pluginID := t.CSIPluginConfig.ID 5604 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 5605 if err != nil { 5606 return err 5607 } 5608 if plug == nil { 5609 plug = structs.NewCSIPlugin(pluginID, index) 5610 } else { 5611 plug = plug.Copy() 5612 } 5613 5614 plug.UpdateExpectedWithJob(alloc.Job, summary, 5615 alloc.Job.Status == structs.JobStatusDead) 5616 5617 err = updateOrGCPlugin(index, txn, plug) 5618 if err != nil { 5619 return err 5620 } 5621 } 5622 } 5623 5624 return nil 5625 } 5626 5627 // UpsertACLPolicies is used to create or update a set of ACL policies 5628 func (s *StateStore) UpsertACLPolicies(msgType structs.MessageType, index uint64, policies []*structs.ACLPolicy) error { 5629 txn := s.db.WriteTxnMsgT(msgType, index) 5630 defer txn.Abort() 5631 5632 for _, policy := range policies { 5633 // Ensure the policy hash is non-nil. This should be done outside the state store 5634 // for performance reasons, but we check here for defense in depth. 5635 if len(policy.Hash) == 0 { 5636 policy.SetHash() 5637 } 5638 5639 // Check if the policy already exists 5640 existing, err := txn.First("acl_policy", "id", policy.Name) 5641 if err != nil { 5642 return fmt.Errorf("policy lookup failed: %v", err) 5643 } 5644 5645 // Update all the indexes 5646 if existing != nil { 5647 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 5648 policy.ModifyIndex = index 5649 } else { 5650 policy.CreateIndex = index 5651 policy.ModifyIndex = index 5652 } 5653 5654 // Update the policy 5655 if err := txn.Insert("acl_policy", policy); err != nil { 5656 return fmt.Errorf("upserting policy failed: %v", err) 5657 } 5658 } 5659 5660 // Update the indexes tabl 5661 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5662 return fmt.Errorf("index update failed: %v", err) 5663 } 5664 5665 return txn.Commit() 5666 } 5667 5668 // DeleteACLPolicies deletes the policies with the given names 5669 func (s *StateStore) DeleteACLPolicies(msgType structs.MessageType, index uint64, names []string) error { 5670 txn := s.db.WriteTxnMsgT(msgType, index) 5671 defer txn.Abort() 5672 5673 // Delete the policy 5674 for _, name := range names { 5675 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 5676 return fmt.Errorf("deleting acl policy failed: %v", err) 5677 } 5678 } 5679 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5680 return fmt.Errorf("index update failed: %v", err) 5681 } 5682 return txn.Commit() 5683 } 5684 5685 // ACLPolicyByName is used to lookup a policy by name 5686 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 5687 txn := s.db.ReadTxn() 5688 5689 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 5690 if err != nil { 5691 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5692 } 5693 ws.Add(watchCh) 5694 5695 if existing != nil { 5696 return existing.(*structs.ACLPolicy), nil 5697 } 5698 return nil, nil 5699 } 5700 5701 // ACLPolicyByNamePrefix is used to lookup policies by prefix 5702 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 5703 txn := s.db.ReadTxn() 5704 5705 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 5706 if err != nil { 5707 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5708 } 5709 ws.Add(iter.WatchCh()) 5710 5711 return iter, nil 5712 } 5713 5714 // ACLPolicyByJob is used to lookup policies that have been attached to a 5715 // specific job 5716 func (s *StateStore) ACLPolicyByJob(ws memdb.WatchSet, ns, jobID string) (memdb.ResultIterator, error) { 5717 txn := s.db.ReadTxn() 5718 5719 iter, err := txn.Get("acl_policy", "job_prefix", ns, jobID) 5720 if err != nil { 5721 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5722 } 5723 ws.Add(iter.WatchCh()) 5724 5725 return iter, nil 5726 } 5727 5728 // ACLPolicies returns an iterator over all the acl policies 5729 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5730 txn := s.db.ReadTxn() 5731 5732 // Walk the entire table 5733 iter, err := txn.Get("acl_policy", "id") 5734 if err != nil { 5735 return nil, err 5736 } 5737 ws.Add(iter.WatchCh()) 5738 return iter, nil 5739 } 5740 5741 // UpsertACLTokens is used to create or update a set of ACL tokens 5742 func (s *StateStore) UpsertACLTokens(msgType structs.MessageType, index uint64, tokens []*structs.ACLToken) error { 5743 txn := s.db.WriteTxnMsgT(msgType, index) 5744 defer txn.Abort() 5745 5746 for _, token := range tokens { 5747 // Ensure the policy hash is non-nil. This should be done outside the state store 5748 // for performance reasons, but we check here for defense in depth. 5749 if len(token.Hash) == 0 { 5750 token.SetHash() 5751 } 5752 5753 // Check if the token already exists 5754 existing, err := txn.First("acl_token", "id", token.AccessorID) 5755 if err != nil { 5756 return fmt.Errorf("token lookup failed: %v", err) 5757 } 5758 5759 // Update all the indexes 5760 if existing != nil { 5761 existTK := existing.(*structs.ACLToken) 5762 token.CreateIndex = existTK.CreateIndex 5763 token.ModifyIndex = index 5764 5765 // Do not allow SecretID or create time to change 5766 token.SecretID = existTK.SecretID 5767 token.CreateTime = existTK.CreateTime 5768 5769 } else { 5770 token.CreateIndex = index 5771 token.ModifyIndex = index 5772 } 5773 5774 // Update the token 5775 if err := txn.Insert("acl_token", token); err != nil { 5776 return fmt.Errorf("upserting token failed: %v", err) 5777 } 5778 } 5779 5780 // Update the indexes table 5781 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5782 return fmt.Errorf("index update failed: %v", err) 5783 } 5784 return txn.Commit() 5785 } 5786 5787 // DeleteACLTokens deletes the tokens with the given accessor ids 5788 func (s *StateStore) DeleteACLTokens(msgType structs.MessageType, index uint64, ids []string) error { 5789 txn := s.db.WriteTxnMsgT(msgType, index) 5790 defer txn.Abort() 5791 5792 // Delete the tokens 5793 for _, id := range ids { 5794 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 5795 return fmt.Errorf("deleting acl token failed: %v", err) 5796 } 5797 } 5798 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5799 return fmt.Errorf("index update failed: %v", err) 5800 } 5801 return txn.Commit() 5802 } 5803 5804 // ACLTokenByAccessorID is used to lookup a token by accessor ID 5805 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 5806 if id == "" { 5807 return nil, fmt.Errorf("acl token lookup failed: missing accessor id") 5808 } 5809 5810 txn := s.db.ReadTxn() 5811 5812 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 5813 if err != nil { 5814 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5815 } 5816 ws.Add(watchCh) 5817 5818 // If the existing token is nil, this indicates it does not exist in state. 5819 if existing == nil { 5820 return nil, nil 5821 } 5822 5823 // Assert the token type which allows us to perform additional work on the 5824 // token that is needed before returning the call. 5825 token := existing.(*structs.ACLToken) 5826 5827 // Handle potential staleness of ACL role links. 5828 if token, err = s.fixTokenRoleLinks(txn, token); err != nil { 5829 return nil, err 5830 } 5831 return token, nil 5832 } 5833 5834 // ACLTokenBySecretID is used to lookup a token by secret ID 5835 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 5836 if secretID == "" { 5837 return nil, fmt.Errorf("acl token lookup failed: missing secret id") 5838 } 5839 5840 txn := s.db.ReadTxn() 5841 5842 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 5843 if err != nil { 5844 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5845 } 5846 ws.Add(watchCh) 5847 5848 // If the existing token is nil, this indicates it does not exist in state. 5849 if existing == nil { 5850 return nil, nil 5851 } 5852 5853 // Assert the token type which allows us to perform additional work on the 5854 // token that is needed before returning the call. 5855 token := existing.(*structs.ACLToken) 5856 5857 // Handle potential staleness of ACL role links. 5858 if token, err = s.fixTokenRoleLinks(txn, token); err != nil { 5859 return nil, err 5860 } 5861 return token, nil 5862 } 5863 5864 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 5865 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string, sort SortOption) (memdb.ResultIterator, error) { 5866 txn := s.db.ReadTxn() 5867 5868 var iter memdb.ResultIterator 5869 var err error 5870 5871 switch sort { 5872 case SortReverse: 5873 iter, err = txn.GetReverse("acl_token", "id_prefix", prefix) 5874 default: 5875 iter, err = txn.Get("acl_token", "id_prefix", prefix) 5876 } 5877 if err != nil { 5878 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5879 } 5880 5881 ws.Add(iter.WatchCh()) 5882 return iter, nil 5883 } 5884 5885 // ACLTokens returns an iterator over all the tokens 5886 func (s *StateStore) ACLTokens(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 5887 txn := s.db.ReadTxn() 5888 5889 var iter memdb.ResultIterator 5890 var err error 5891 5892 switch sort { 5893 case SortReverse: 5894 iter, err = txn.GetReverse("acl_token", "create") 5895 default: 5896 iter, err = txn.Get("acl_token", "create") 5897 } 5898 if err != nil { 5899 return nil, err 5900 } 5901 5902 ws.Add(iter.WatchCh()) 5903 return iter, nil 5904 } 5905 5906 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 5907 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool, sort SortOption) (memdb.ResultIterator, error) { 5908 txn := s.db.ReadTxn() 5909 5910 var iter memdb.ResultIterator 5911 var err error 5912 5913 // Walk the entire table 5914 switch sort { 5915 case SortReverse: 5916 iter, err = txn.GetReverse("acl_token", "global", globalVal) 5917 default: 5918 iter, err = txn.Get("acl_token", "global", globalVal) 5919 } 5920 if err != nil { 5921 return nil, err 5922 } 5923 5924 ws.Add(iter.WatchCh()) 5925 return iter, nil 5926 } 5927 5928 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 5929 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 5930 txn := s.db.ReadTxn() 5931 5932 // Lookup the bootstrap sentinel 5933 out, err := txn.First("index", "id", "acl_token_bootstrap") 5934 if err != nil { 5935 return false, 0, err 5936 } 5937 5938 // No entry, we haven't bootstrapped yet 5939 if out == nil { 5940 return true, 0, nil 5941 } 5942 5943 // Return the reset index if we've already bootstrapped 5944 return false, out.(*IndexEntry).Value, nil 5945 } 5946 5947 // BootstrapACLTokens is used to create an initial ACL token. 5948 func (s *StateStore) BootstrapACLTokens(msgType structs.MessageType, index uint64, resetIndex uint64, token *structs.ACLToken) error { 5949 txn := s.db.WriteTxnMsgT(msgType, index) 5950 defer txn.Abort() 5951 5952 // Check if we have already done a bootstrap 5953 existing, err := txn.First("index", "id", "acl_token_bootstrap") 5954 if err != nil { 5955 return fmt.Errorf("bootstrap check failed: %v", err) 5956 } 5957 if existing != nil { 5958 if resetIndex == 0 { 5959 return fmt.Errorf("ACL bootstrap already done") 5960 } else if resetIndex != existing.(*IndexEntry).Value { 5961 return fmt.Errorf("Invalid reset index for ACL bootstrap") 5962 } 5963 } 5964 5965 // Update the Create/Modify time 5966 token.CreateIndex = index 5967 token.ModifyIndex = index 5968 5969 // Insert the token 5970 if err := txn.Insert("acl_token", token); err != nil { 5971 return fmt.Errorf("upserting token failed: %v", err) 5972 } 5973 5974 // Update the indexes table, prevents future bootstrap until reset 5975 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5976 return fmt.Errorf("index update failed: %v", err) 5977 } 5978 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 5979 return fmt.Errorf("index update failed: %v", err) 5980 } 5981 return txn.Commit() 5982 } 5983 5984 // UpsertOneTimeToken is used to create or update a set of ACL 5985 // tokens. Validating that we're not upserting an already-expired token is 5986 // made the responsibility of the caller to facilitate testing. 5987 func (s *StateStore) UpsertOneTimeToken(msgType structs.MessageType, index uint64, token *structs.OneTimeToken) error { 5988 txn := s.db.WriteTxnMsgT(msgType, index) 5989 defer txn.Abort() 5990 5991 // we expect the RPC call to set the ExpiresAt 5992 if token.ExpiresAt.IsZero() { 5993 return fmt.Errorf("one-time token must have an ExpiresAt time") 5994 } 5995 5996 // Update all the indexes 5997 token.CreateIndex = index 5998 token.ModifyIndex = index 5999 6000 // Create the token 6001 if err := txn.Insert("one_time_token", token); err != nil { 6002 return fmt.Errorf("upserting one-time token failed: %v", err) 6003 } 6004 6005 // Update the indexes table 6006 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6007 return fmt.Errorf("index update failed: %v", err) 6008 } 6009 return txn.Commit() 6010 } 6011 6012 // DeleteOneTimeTokens deletes the tokens with the given ACLToken Accessor IDs 6013 func (s *StateStore) DeleteOneTimeTokens(msgType structs.MessageType, index uint64, ids []string) error { 6014 txn := s.db.WriteTxnMsgT(msgType, index) 6015 defer txn.Abort() 6016 6017 var deleted int 6018 for _, id := range ids { 6019 d, err := txn.DeleteAll("one_time_token", "id", id) 6020 if err != nil { 6021 return fmt.Errorf("deleting one-time token failed: %v", err) 6022 } 6023 deleted += d 6024 } 6025 6026 if deleted > 0 { 6027 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6028 return fmt.Errorf("index update failed: %v", err) 6029 } 6030 } 6031 return txn.Commit() 6032 } 6033 6034 // ExpireOneTimeTokens deletes tokens that have expired 6035 func (s *StateStore) ExpireOneTimeTokens(msgType structs.MessageType, index uint64, timestamp time.Time) error { 6036 txn := s.db.WriteTxnMsgT(msgType, index) 6037 defer txn.Abort() 6038 6039 iter, err := s.oneTimeTokensExpiredTxn(txn, nil, timestamp) 6040 if err != nil { 6041 return err 6042 } 6043 6044 var deleted int 6045 for { 6046 raw := iter.Next() 6047 if raw == nil { 6048 break 6049 } 6050 ott, ok := raw.(*structs.OneTimeToken) 6051 if !ok || ott == nil { 6052 return fmt.Errorf("could not decode one-time token") 6053 } 6054 d, err := txn.DeleteAll("one_time_token", "secret", ott.OneTimeSecretID) 6055 if err != nil { 6056 return fmt.Errorf("deleting one-time token failed: %v", err) 6057 } 6058 deleted += d 6059 } 6060 6061 if deleted > 0 { 6062 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6063 return fmt.Errorf("index update failed: %v", err) 6064 } 6065 } 6066 return txn.Commit() 6067 } 6068 6069 // oneTimeTokensExpiredTxn returns an iterator over all expired one-time tokens 6070 func (s *StateStore) oneTimeTokensExpiredTxn(txn *txn, ws memdb.WatchSet, timestamp time.Time) (memdb.ResultIterator, error) { 6071 iter, err := txn.Get("one_time_token", "id") 6072 if err != nil { 6073 return nil, fmt.Errorf("one-time token lookup failed: %v", err) 6074 } 6075 6076 ws.Add(iter.WatchCh()) 6077 iter = memdb.NewFilterIterator(iter, expiredOneTimeTokenFilter(timestamp)) 6078 return iter, nil 6079 } 6080 6081 // OneTimeTokenBySecret is used to lookup a token by secret 6082 func (s *StateStore) OneTimeTokenBySecret(ws memdb.WatchSet, secret string) (*structs.OneTimeToken, error) { 6083 if secret == "" { 6084 return nil, fmt.Errorf("one-time token lookup failed: missing secret") 6085 } 6086 6087 txn := s.db.ReadTxn() 6088 6089 watchCh, existing, err := txn.FirstWatch("one_time_token", "secret", secret) 6090 if err != nil { 6091 return nil, fmt.Errorf("one-time token lookup failed: %v", err) 6092 } 6093 ws.Add(watchCh) 6094 6095 if existing != nil { 6096 return existing.(*structs.OneTimeToken), nil 6097 } 6098 return nil, nil 6099 } 6100 6101 // expiredOneTimeTokenFilter returns a filter function that returns only 6102 // expired one-time tokens 6103 func expiredOneTimeTokenFilter(now time.Time) func(interface{}) bool { 6104 return func(raw interface{}) bool { 6105 ott, ok := raw.(*structs.OneTimeToken) 6106 if !ok { 6107 return true 6108 } 6109 6110 return ott.ExpiresAt.After(now) 6111 } 6112 } 6113 6114 // SchedulerConfig is used to get the current Scheduler configuration. 6115 func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) { 6116 tx := s.db.ReadTxn() 6117 defer tx.Abort() 6118 return s.schedulerConfigTxn(tx) 6119 } 6120 6121 func (s *StateStore) schedulerConfigTxn(txn *txn) (uint64, *structs.SchedulerConfiguration, error) { 6122 6123 // Get the scheduler config 6124 c, err := txn.First("scheduler_config", "id") 6125 if err != nil { 6126 return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err) 6127 } 6128 6129 config, ok := c.(*structs.SchedulerConfiguration) 6130 if !ok { 6131 return 0, nil, nil 6132 } 6133 6134 return config.ModifyIndex, config, nil 6135 } 6136 6137 // SchedulerSetConfig is used to set the current Scheduler configuration. 6138 func (s *StateStore) SchedulerSetConfig(index uint64, config *structs.SchedulerConfiguration) error { 6139 tx := s.db.WriteTxn(index) 6140 defer tx.Abort() 6141 6142 s.schedulerSetConfigTxn(index, tx, config) 6143 6144 return tx.Commit() 6145 } 6146 6147 func (s *StateStore) ClusterMetadata(ws memdb.WatchSet) (*structs.ClusterMetadata, error) { 6148 txn := s.db.ReadTxn() 6149 defer txn.Abort() 6150 6151 // Get the cluster metadata 6152 watchCh, m, err := txn.FirstWatch("cluster_meta", "id") 6153 if err != nil { 6154 return nil, fmt.Errorf("failed cluster metadata lookup: %w", err) 6155 } 6156 ws.Add(watchCh) 6157 6158 if m != nil { 6159 return m.(*structs.ClusterMetadata), nil 6160 } 6161 6162 return nil, nil 6163 } 6164 6165 func (s *StateStore) ClusterSetMetadata(index uint64, meta *structs.ClusterMetadata) error { 6166 txn := s.db.WriteTxn(index) 6167 defer txn.Abort() 6168 6169 if err := s.setClusterMetadata(txn, meta); err != nil { 6170 return fmt.Errorf("set cluster metadata failed: %w", err) 6171 } 6172 6173 return txn.Commit() 6174 } 6175 6176 // WithWriteTransaction executes the passed function within a write transaction, 6177 // and returns its result. If the invocation returns no error, the transaction 6178 // is committed; otherwise, it's aborted. 6179 func (s *StateStore) WithWriteTransaction(msgType structs.MessageType, index uint64, fn func(Txn) error) error { 6180 tx := s.db.WriteTxnMsgT(msgType, index) 6181 defer tx.Abort() 6182 6183 err := fn(tx) 6184 if err == nil { 6185 return tx.Commit() 6186 } 6187 return err 6188 } 6189 6190 // SchedulerCASConfig is used to update the scheduler configuration with a 6191 // given Raft index. If the CAS index specified is not equal to the last observed index 6192 // for the config, then the call is a noop. 6193 func (s *StateStore) SchedulerCASConfig(index, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) { 6194 tx := s.db.WriteTxn(index) 6195 defer tx.Abort() 6196 6197 // Check for an existing config 6198 existing, err := tx.First("scheduler_config", "id") 6199 if err != nil { 6200 return false, fmt.Errorf("failed scheduler config lookup: %s", err) 6201 } 6202 6203 // If the existing index does not match the provided CAS 6204 // index arg, then we shouldn't update anything and can safely 6205 // return early here. 6206 e, ok := existing.(*structs.SchedulerConfiguration) 6207 if !ok || (e != nil && e.ModifyIndex != cidx) { 6208 return false, nil 6209 } 6210 6211 s.schedulerSetConfigTxn(index, tx, config) 6212 6213 if err := tx.Commit(); err != nil { 6214 return false, err 6215 } 6216 return true, nil 6217 } 6218 6219 func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *txn, config *structs.SchedulerConfiguration) error { 6220 // Check for an existing config 6221 existing, err := tx.First("scheduler_config", "id") 6222 if err != nil { 6223 return fmt.Errorf("failed scheduler config lookup: %s", err) 6224 } 6225 6226 // Set the indexes. 6227 if existing != nil { 6228 config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex 6229 } else { 6230 config.CreateIndex = idx 6231 } 6232 config.ModifyIndex = idx 6233 6234 if err := tx.Insert("scheduler_config", config); err != nil { 6235 return fmt.Errorf("failed updating scheduler config: %s", err) 6236 } 6237 return nil 6238 } 6239 6240 func (s *StateStore) setClusterMetadata(txn *txn, meta *structs.ClusterMetadata) error { 6241 // Check for an existing config, if it exists, verify that the cluster ID matches 6242 existing, err := txn.First("cluster_meta", "id") 6243 if err != nil { 6244 return fmt.Errorf("failed cluster meta lookup: %v", err) 6245 } 6246 6247 if existing != nil { 6248 existingClusterID := existing.(*structs.ClusterMetadata).ClusterID 6249 if meta.ClusterID != existingClusterID && existingClusterID != "" { 6250 // there is a bug in cluster ID detection 6251 return fmt.Errorf("refusing to set new cluster id, previous: %s, new: %s", existingClusterID, meta.ClusterID) 6252 } 6253 } 6254 6255 // update is technically a noop, unless someday we add more / mutable fields 6256 if err := txn.Insert("cluster_meta", meta); err != nil { 6257 return fmt.Errorf("set cluster metadata failed: %v", err) 6258 } 6259 6260 return nil 6261 } 6262 6263 // UpsertScalingPolicies is used to insert a new scaling policy. 6264 func (s *StateStore) UpsertScalingPolicies(index uint64, scalingPolicies []*structs.ScalingPolicy) error { 6265 txn := s.db.WriteTxn(index) 6266 defer txn.Abort() 6267 6268 if err := s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn); err != nil { 6269 return err 6270 } 6271 6272 return txn.Commit() 6273 } 6274 6275 // UpsertScalingPoliciesTxn is used to insert a new scaling policy. 6276 func (s *StateStore) UpsertScalingPoliciesTxn(index uint64, scalingPolicies []*structs.ScalingPolicy, 6277 txn *txn) error { 6278 6279 hadUpdates := false 6280 6281 for _, policy := range scalingPolicies { 6282 // Check if the scaling policy already exists 6283 // Policy uniqueness is based on target and type 6284 it, err := txn.Get("scaling_policy", "target", 6285 policy.Target[structs.ScalingTargetNamespace], 6286 policy.Target[structs.ScalingTargetJob], 6287 policy.Target[structs.ScalingTargetGroup], 6288 policy.Target[structs.ScalingTargetTask], 6289 ) 6290 if err != nil { 6291 return fmt.Errorf("scaling policy lookup failed: %v", err) 6292 } 6293 6294 // Check if type matches 6295 var existing *structs.ScalingPolicy 6296 for raw := it.Next(); raw != nil; raw = it.Next() { 6297 p := raw.(*structs.ScalingPolicy) 6298 if p.Type == policy.Type { 6299 existing = p 6300 break 6301 } 6302 } 6303 6304 // Setup the indexes correctly 6305 if existing != nil { 6306 if !existing.Diff(policy) { 6307 continue 6308 } 6309 policy.ID = existing.ID 6310 policy.CreateIndex = existing.CreateIndex 6311 } else { 6312 // policy.ID must have been set already in Job.Register before log apply 6313 policy.CreateIndex = index 6314 } 6315 policy.ModifyIndex = index 6316 6317 // Insert the scaling policy 6318 hadUpdates = true 6319 if err := txn.Insert("scaling_policy", policy); err != nil { 6320 return err 6321 } 6322 } 6323 6324 // Update the indexes table for scaling policy if we updated any policies 6325 if hadUpdates { 6326 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 6327 return fmt.Errorf("index update failed: %v", err) 6328 } 6329 } 6330 6331 return nil 6332 } 6333 6334 // NamespaceByName is used to lookup a namespace by name 6335 func (s *StateStore) NamespaceByName(ws memdb.WatchSet, name string) (*structs.Namespace, error) { 6336 txn := s.db.ReadTxn() 6337 return s.namespaceByNameImpl(ws, txn, name) 6338 } 6339 6340 // namespaceByNameImpl is used to lookup a namespace by name 6341 func (s *StateStore) namespaceByNameImpl(ws memdb.WatchSet, txn *txn, name string) (*structs.Namespace, error) { 6342 watchCh, existing, err := txn.FirstWatch(TableNamespaces, "id", name) 6343 if err != nil { 6344 return nil, fmt.Errorf("namespace lookup failed: %v", err) 6345 } 6346 ws.Add(watchCh) 6347 6348 if existing != nil { 6349 return existing.(*structs.Namespace), nil 6350 } 6351 return nil, nil 6352 } 6353 6354 // namespaceExists returns whether a namespace exists 6355 func (s *StateStore) namespaceExists(txn *txn, namespace string) (bool, error) { 6356 if namespace == structs.DefaultNamespace { 6357 return true, nil 6358 } 6359 6360 existing, err := txn.First(TableNamespaces, "id", namespace) 6361 if err != nil { 6362 return false, fmt.Errorf("namespace lookup failed: %v", err) 6363 } 6364 6365 return existing != nil, nil 6366 } 6367 6368 // NamespacesByNamePrefix is used to lookup namespaces by prefix 6369 func (s *StateStore) NamespacesByNamePrefix(ws memdb.WatchSet, namePrefix string) (memdb.ResultIterator, error) { 6370 txn := s.db.ReadTxn() 6371 6372 iter, err := txn.Get(TableNamespaces, "id_prefix", namePrefix) 6373 if err != nil { 6374 return nil, fmt.Errorf("namespaces lookup failed: %v", err) 6375 } 6376 ws.Add(iter.WatchCh()) 6377 6378 return iter, nil 6379 } 6380 6381 // Namespaces returns an iterator over all the namespaces 6382 func (s *StateStore) Namespaces(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6383 txn := s.db.ReadTxn() 6384 6385 // Walk the entire namespace table 6386 iter, err := txn.Get(TableNamespaces, "id") 6387 if err != nil { 6388 return nil, err 6389 } 6390 ws.Add(iter.WatchCh()) 6391 return iter, nil 6392 } 6393 6394 func (s *StateStore) NamespaceNames() ([]string, error) { 6395 it, err := s.Namespaces(nil) 6396 if err != nil { 6397 return nil, err 6398 } 6399 6400 nses := []string{} 6401 for { 6402 next := it.Next() 6403 if next == nil { 6404 break 6405 } 6406 ns := next.(*structs.Namespace) 6407 nses = append(nses, ns.Name) 6408 } 6409 6410 return nses, nil 6411 } 6412 6413 // UpsertNamespaces is used to register or update a set of namespaces. 6414 func (s *StateStore) UpsertNamespaces(index uint64, namespaces []*structs.Namespace) error { 6415 txn := s.db.WriteTxn(index) 6416 defer txn.Abort() 6417 6418 for _, ns := range namespaces { 6419 if err := s.upsertNamespaceImpl(index, txn, ns); err != nil { 6420 return err 6421 } 6422 } 6423 6424 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 6425 return fmt.Errorf("index update failed: %v", err) 6426 } 6427 6428 return txn.Commit() 6429 } 6430 6431 // upsertNamespaceImpl is used to upsert a namespace 6432 func (s *StateStore) upsertNamespaceImpl(index uint64, txn *txn, namespace *structs.Namespace) error { 6433 // Ensure the namespace hash is non-nil. This should be done outside the state store 6434 // for performance reasons, but we check here for defense in depth. 6435 ns := namespace 6436 if len(ns.Hash) == 0 { 6437 ns.SetHash() 6438 } 6439 6440 // Check if the namespace already exists 6441 existing, err := txn.First(TableNamespaces, "id", ns.Name) 6442 if err != nil { 6443 return fmt.Errorf("namespace lookup failed: %v", err) 6444 } 6445 6446 // Setup the indexes correctly and determine which quotas need to be 6447 // reconciled 6448 var oldQuota string 6449 if existing != nil { 6450 exist := existing.(*structs.Namespace) 6451 ns.CreateIndex = exist.CreateIndex 6452 ns.ModifyIndex = index 6453 6454 // Grab the old quota on the namespace 6455 oldQuota = exist.Quota 6456 } else { 6457 ns.CreateIndex = index 6458 ns.ModifyIndex = index 6459 } 6460 6461 // Validate that the quota on the new namespace exists 6462 if ns.Quota != "" { 6463 exists, err := s.quotaSpecExists(txn, ns.Quota) 6464 if err != nil { 6465 return fmt.Errorf("looking up namespace quota %q failed: %v", ns.Quota, err) 6466 } else if !exists { 6467 return fmt.Errorf("namespace %q using non-existent quota %q", ns.Name, ns.Quota) 6468 } 6469 } 6470 6471 // Insert the namespace 6472 if err := txn.Insert(TableNamespaces, ns); err != nil { 6473 return fmt.Errorf("namespace insert failed: %v", err) 6474 } 6475 6476 // Reconcile changed quotas 6477 return s.quotaReconcile(index, txn, ns.Quota, oldQuota) 6478 } 6479 6480 // DeleteNamespaces is used to remove a set of namespaces 6481 func (s *StateStore) DeleteNamespaces(index uint64, names []string) error { 6482 txn := s.db.WriteTxn(index) 6483 defer txn.Abort() 6484 6485 for _, name := range names { 6486 // Lookup the namespace 6487 existing, err := txn.First(TableNamespaces, "id", name) 6488 if err != nil { 6489 return fmt.Errorf("namespace lookup failed: %v", err) 6490 } 6491 if existing == nil { 6492 return fmt.Errorf("namespace not found") 6493 } 6494 6495 ns := existing.(*structs.Namespace) 6496 if ns.Name == structs.DefaultNamespace { 6497 return fmt.Errorf("default namespace can not be deleted") 6498 } 6499 6500 // Ensure that the namespace doesn't have any non-terminal jobs 6501 iter, err := s.jobsByNamespaceImpl(nil, name, txn) 6502 if err != nil { 6503 return err 6504 } 6505 6506 for { 6507 raw := iter.Next() 6508 if raw == nil { 6509 break 6510 } 6511 job := raw.(*structs.Job) 6512 6513 if job.Status != structs.JobStatusDead { 6514 return fmt.Errorf("namespace %q contains at least one non-terminal job %q. "+ 6515 "All jobs must be terminal in namespace before it can be deleted", name, job.ID) 6516 } 6517 } 6518 6519 vIter, err := s.csiVolumesByNamespaceImpl(txn, nil, name, "") 6520 if err != nil { 6521 return err 6522 } 6523 rawVol := vIter.Next() 6524 if rawVol != nil { 6525 vol := rawVol.(*structs.CSIVolume) 6526 return fmt.Errorf("namespace %q contains at least one CSI volume %q. "+ 6527 "All CSI volumes in namespace must be deleted before it can be deleted", name, vol.ID) 6528 } 6529 6530 varIter, err := s.getVariablesByNamespaceImpl(txn, nil, name) 6531 if err != nil { 6532 return err 6533 } 6534 if varIter.Next() != nil { 6535 // unlike job/volume, don't show the path here because the user may 6536 // not have List permissions on the vars in this namespace 6537 return fmt.Errorf("namespace %q contains at least one variable. "+ 6538 "All variables in namespace must be deleted before it can be deleted", name) 6539 } 6540 6541 // Delete the namespace 6542 if err := txn.Delete(TableNamespaces, existing); err != nil { 6543 return fmt.Errorf("namespace deletion failed: %v", err) 6544 } 6545 } 6546 6547 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 6548 return fmt.Errorf("index update failed: %v", err) 6549 } 6550 6551 return txn.Commit() 6552 } 6553 6554 func (s *StateStore) DeleteScalingPolicies(index uint64, ids []string) error { 6555 txn := s.db.WriteTxn(index) 6556 defer txn.Abort() 6557 6558 err := s.DeleteScalingPoliciesTxn(index, ids, txn) 6559 if err == nil { 6560 return txn.Commit() 6561 } 6562 6563 return err 6564 } 6565 6566 // DeleteScalingPoliciesTxn is used to delete a set of scaling policies by ID. 6567 func (s *StateStore) DeleteScalingPoliciesTxn(index uint64, ids []string, txn *txn) error { 6568 if len(ids) == 0 { 6569 return nil 6570 } 6571 6572 for _, id := range ids { 6573 // Lookup the scaling policy 6574 existing, err := txn.First("scaling_policy", "id", id) 6575 if err != nil { 6576 return fmt.Errorf("scaling policy lookup failed: %v", err) 6577 } 6578 if existing == nil { 6579 return fmt.Errorf("scaling policy not found") 6580 } 6581 6582 // Delete the scaling policy 6583 if err := txn.Delete("scaling_policy", existing); err != nil { 6584 return fmt.Errorf("scaling policy delete failed: %v", err) 6585 } 6586 } 6587 6588 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 6589 return fmt.Errorf("index update failed: %v", err) 6590 } 6591 6592 return nil 6593 } 6594 6595 // ScalingPolicies returns an iterator over all the scaling policies 6596 func (s *StateStore) ScalingPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6597 txn := s.db.ReadTxn() 6598 6599 // Walk the entire scaling_policy table 6600 iter, err := txn.Get("scaling_policy", "id") 6601 if err != nil { 6602 return nil, err 6603 } 6604 6605 ws.Add(iter.WatchCh()) 6606 6607 return iter, nil 6608 } 6609 6610 // ScalingPoliciesByTypePrefix returns an iterator over scaling policies with a certain type prefix. 6611 func (s *StateStore) ScalingPoliciesByTypePrefix(ws memdb.WatchSet, t string) (memdb.ResultIterator, error) { 6612 txn := s.db.ReadTxn() 6613 6614 iter, err := txn.Get("scaling_policy", "type_prefix", t) 6615 if err != nil { 6616 return nil, err 6617 } 6618 6619 ws.Add(iter.WatchCh()) 6620 return iter, nil 6621 } 6622 6623 func (s *StateStore) ScalingPoliciesByNamespace(ws memdb.WatchSet, namespace, typ string) (memdb.ResultIterator, error) { 6624 txn := s.db.ReadTxn() 6625 6626 iter, err := txn.Get("scaling_policy", "target_prefix", namespace) 6627 if err != nil { 6628 return nil, err 6629 } 6630 6631 ws.Add(iter.WatchCh()) 6632 6633 // Wrap the iterator in a filter to exact match the namespace 6634 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 6635 6636 // If policy type is specified as well, wrap again 6637 if typ != "" { 6638 iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool { 6639 p, ok := raw.(*structs.ScalingPolicy) 6640 if !ok { 6641 return true 6642 } 6643 return !strings.HasPrefix(p.Type, typ) 6644 }) 6645 } 6646 6647 return iter, nil 6648 } 6649 6650 func (s *StateStore) ScalingPoliciesByJob(ws memdb.WatchSet, namespace, jobID, policyType string) (memdb.ResultIterator, 6651 error) { 6652 txn := s.db.ReadTxn() 6653 iter, err := s.ScalingPoliciesByJobTxn(ws, namespace, jobID, txn) 6654 if err != nil { 6655 return nil, err 6656 } 6657 6658 if policyType == "" { 6659 return iter, nil 6660 } 6661 6662 filter := func(raw interface{}) bool { 6663 p, ok := raw.(*structs.ScalingPolicy) 6664 if !ok { 6665 return true 6666 } 6667 return policyType != p.Type 6668 } 6669 6670 return memdb.NewFilterIterator(iter, filter), nil 6671 } 6672 6673 func (s *StateStore) ScalingPoliciesByJobTxn(ws memdb.WatchSet, namespace, jobID string, 6674 txn *txn) (memdb.ResultIterator, error) { 6675 6676 iter, err := txn.Get("scaling_policy", "target_prefix", namespace, jobID) 6677 if err != nil { 6678 return nil, err 6679 } 6680 6681 ws.Add(iter.WatchCh()) 6682 6683 filter := func(raw interface{}) bool { 6684 d, ok := raw.(*structs.ScalingPolicy) 6685 if !ok { 6686 return true 6687 } 6688 6689 return d.Target[structs.ScalingTargetJob] != jobID 6690 } 6691 6692 // Wrap the iterator in a filter 6693 wrap := memdb.NewFilterIterator(iter, filter) 6694 return wrap, nil 6695 } 6696 6697 func (s *StateStore) ScalingPolicyByID(ws memdb.WatchSet, id string) (*structs.ScalingPolicy, error) { 6698 txn := s.db.ReadTxn() 6699 6700 watchCh, existing, err := txn.FirstWatch("scaling_policy", "id", id) 6701 if err != nil { 6702 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 6703 } 6704 ws.Add(watchCh) 6705 6706 if existing != nil { 6707 return existing.(*structs.ScalingPolicy), nil 6708 } 6709 6710 return nil, nil 6711 } 6712 6713 // ScalingPolicyByTargetAndType returns a fully-qualified policy against a target and policy type, 6714 // or nil if it does not exist. This method does not honor the watchset on the policy type, just the target. 6715 func (s *StateStore) ScalingPolicyByTargetAndType(ws memdb.WatchSet, target map[string]string, typ string) (*structs.ScalingPolicy, 6716 error) { 6717 txn := s.db.ReadTxn() 6718 6719 namespace := target[structs.ScalingTargetNamespace] 6720 job := target[structs.ScalingTargetJob] 6721 group := target[structs.ScalingTargetGroup] 6722 task := target[structs.ScalingTargetTask] 6723 6724 it, err := txn.Get("scaling_policy", "target", namespace, job, group, task) 6725 if err != nil { 6726 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 6727 } 6728 6729 ws.Add(it.WatchCh()) 6730 6731 // Check for type 6732 var existing *structs.ScalingPolicy 6733 for raw := it.Next(); raw != nil; raw = it.Next() { 6734 p := raw.(*structs.ScalingPolicy) 6735 if p.Type == typ { 6736 existing = p 6737 break 6738 } 6739 } 6740 6741 if existing != nil { 6742 return existing, nil 6743 } 6744 6745 return nil, nil 6746 } 6747 6748 func (s *StateStore) ScalingPoliciesByIDPrefix(ws memdb.WatchSet, namespace string, prefix string) (memdb.ResultIterator, error) { 6749 txn := s.db.ReadTxn() 6750 6751 iter, err := txn.Get("scaling_policy", "id_prefix", prefix) 6752 if err != nil { 6753 return nil, fmt.Errorf("scaling policy lookup failed: %v", err) 6754 } 6755 6756 ws.Add(iter.WatchCh()) 6757 6758 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 6759 6760 return iter, nil 6761 } 6762 6763 // scalingPolicyNamespaceFilter returns a filter function that filters all 6764 // scaling policies not targeting the given namespace. 6765 func scalingPolicyNamespaceFilter(namespace string) func(interface{}) bool { 6766 return func(raw interface{}) bool { 6767 p, ok := raw.(*structs.ScalingPolicy) 6768 if !ok { 6769 return true 6770 } 6771 6772 return p.Target[structs.ScalingTargetNamespace] != namespace 6773 } 6774 } 6775 6776 // StateSnapshot is used to provide a point-in-time snapshot 6777 type StateSnapshot struct { 6778 StateStore 6779 } 6780 6781 // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the 6782 // Allocation for each of the Allocation diffs and merges the updated attributes with 6783 // the existing Allocation, and attaches the Job provided 6784 func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error { 6785 for nodeID, allocs := range nodeAllocations { 6786 denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs) 6787 if err != nil { 6788 return err 6789 } 6790 6791 nodeAllocations[nodeID] = denormalizedAllocs 6792 } 6793 return nil 6794 } 6795 6796 // DenormalizeAllocationSlice queries the Allocation for each allocation diff 6797 // represented as an Allocation and merges the updated attributes with the existing 6798 // Allocation, and attaches the Job provided. 6799 // 6800 // This should only be called on terminal allocs, particularly stopped or preempted allocs 6801 func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) { 6802 allocDiffs := make([]*structs.AllocationDiff, len(allocs)) 6803 for i, alloc := range allocs { 6804 allocDiffs[i] = alloc.AllocationDiff() 6805 } 6806 6807 return s.DenormalizeAllocationDiffSlice(allocDiffs) 6808 } 6809 6810 // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges 6811 // the updated attributes with the existing Allocation, and attaches the Job provided. 6812 // 6813 // This should only be called on terminal alloc, particularly stopped or preempted allocs 6814 func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) { 6815 // Output index for denormalized Allocations 6816 j := 0 6817 6818 denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs)) 6819 for _, allocDiff := range allocDiffs { 6820 alloc, err := s.AllocByID(nil, allocDiff.ID) 6821 if err != nil { 6822 return nil, fmt.Errorf("alloc lookup failed: %v", err) 6823 } 6824 if alloc == nil { 6825 return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID) 6826 } 6827 6828 // Merge the updates to the Allocation. Don't update alloc.Job for terminal allocs 6829 // so alloc refers to the latest Job view before destruction and to ease handler implementations 6830 allocCopy := alloc.Copy() 6831 6832 if allocDiff.PreemptedByAllocation != "" { 6833 allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation 6834 allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation) 6835 allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict 6836 } else { 6837 // If alloc is a stopped alloc 6838 allocCopy.DesiredDescription = allocDiff.DesiredDescription 6839 allocCopy.DesiredStatus = structs.AllocDesiredStatusStop 6840 if allocDiff.ClientStatus != "" { 6841 allocCopy.ClientStatus = allocDiff.ClientStatus 6842 } 6843 if allocDiff.FollowupEvalID != "" { 6844 allocCopy.FollowupEvalID = allocDiff.FollowupEvalID 6845 } 6846 } 6847 if allocDiff.ModifyTime != 0 { 6848 allocCopy.ModifyTime = allocDiff.ModifyTime 6849 } 6850 6851 // Update the allocDiff in the slice to equal the denormalized alloc 6852 denormalizedAllocs[j] = allocCopy 6853 j++ 6854 } 6855 // Retain only the denormalized Allocations in the slice 6856 denormalizedAllocs = denormalizedAllocs[:j] 6857 return denormalizedAllocs, nil 6858 } 6859 6860 func getPreemptedAllocDesiredDescription(preemptedByAllocID string) string { 6861 return fmt.Sprintf("Preempted by alloc ID %v", preemptedByAllocID) 6862 } 6863 6864 // UpsertRootKeyMeta saves root key meta or updates it in-place. 6865 func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKeyMeta, rekey bool) error { 6866 txn := s.db.WriteTxn(index) 6867 defer txn.Abort() 6868 6869 // get any existing key for updating 6870 raw, err := txn.First(TableRootKeyMeta, indexID, rootKeyMeta.KeyID) 6871 if err != nil { 6872 return fmt.Errorf("root key metadata lookup failed: %v", err) 6873 } 6874 6875 isRotation := false 6876 6877 if raw != nil { 6878 existing := raw.(*structs.RootKeyMeta) 6879 rootKeyMeta.CreateIndex = existing.CreateIndex 6880 rootKeyMeta.CreateTime = existing.CreateTime 6881 isRotation = !existing.Active() && rootKeyMeta.Active() 6882 } else { 6883 rootKeyMeta.CreateIndex = index 6884 isRotation = rootKeyMeta.Active() 6885 } 6886 rootKeyMeta.ModifyIndex = index 6887 6888 if rekey && !isRotation { 6889 return fmt.Errorf("cannot rekey without setting the new key active") 6890 } 6891 6892 // if the upsert is for a newly-active key, we need to set all the 6893 // other keys as inactive in the same transaction. 6894 if isRotation { 6895 iter, err := txn.Get(TableRootKeyMeta, indexID) 6896 if err != nil { 6897 return err 6898 } 6899 for { 6900 raw := iter.Next() 6901 if raw == nil { 6902 break 6903 } 6904 key := raw.(*structs.RootKeyMeta) 6905 modified := false 6906 6907 switch key.State { 6908 case structs.RootKeyStateInactive: 6909 if rekey { 6910 key.SetRekeying() 6911 modified = true 6912 } 6913 case structs.RootKeyStateActive: 6914 if rekey { 6915 key.SetRekeying() 6916 } else { 6917 key.SetInactive() 6918 } 6919 modified = true 6920 case structs.RootKeyStateRekeying, structs.RootKeyStateDeprecated: 6921 // nothing to do 6922 } 6923 6924 if modified { 6925 key.ModifyIndex = index 6926 if err := txn.Insert(TableRootKeyMeta, key); err != nil { 6927 return err 6928 } 6929 } 6930 6931 } 6932 } 6933 6934 if err := txn.Insert(TableRootKeyMeta, rootKeyMeta); err != nil { 6935 return err 6936 } 6937 6938 // update the indexes table 6939 if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { 6940 return fmt.Errorf("index update failed: %v", err) 6941 } 6942 return txn.Commit() 6943 } 6944 6945 // DeleteRootKeyMeta deletes a single root key, or returns an error if 6946 // it doesn't exist. 6947 func (s *StateStore) DeleteRootKeyMeta(index uint64, keyID string) error { 6948 txn := s.db.WriteTxn(index) 6949 defer txn.Abort() 6950 6951 // find the old key 6952 existing, err := txn.First(TableRootKeyMeta, indexID, keyID) 6953 if err != nil { 6954 return fmt.Errorf("root key metadata lookup failed: %v", err) 6955 } 6956 if existing == nil { 6957 return fmt.Errorf("root key metadata not found") 6958 } 6959 if err := txn.Delete(TableRootKeyMeta, existing); err != nil { 6960 return fmt.Errorf("root key metadata delete failed: %v", err) 6961 } 6962 6963 // update the indexes table 6964 if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { 6965 return fmt.Errorf("index update failed: %v", err) 6966 } 6967 6968 return txn.Commit() 6969 } 6970 6971 // RootKeyMetas returns an iterator over all root key metadata 6972 func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6973 txn := s.db.ReadTxn() 6974 6975 iter, err := txn.Get(TableRootKeyMeta, indexID) 6976 if err != nil { 6977 return nil, err 6978 } 6979 6980 ws.Add(iter.WatchCh()) 6981 return iter, nil 6982 } 6983 6984 // RootKeyMetaByID returns a specific root key meta 6985 func (s *StateStore) RootKeyMetaByID(ws memdb.WatchSet, id string) (*structs.RootKeyMeta, error) { 6986 txn := s.db.ReadTxn() 6987 6988 watchCh, raw, err := txn.FirstWatch(TableRootKeyMeta, indexID, id) 6989 if err != nil { 6990 return nil, fmt.Errorf("root key metadata lookup failed: %v", err) 6991 } 6992 ws.Add(watchCh) 6993 6994 if raw != nil { 6995 return raw.(*structs.RootKeyMeta), nil 6996 } 6997 return nil, nil 6998 } 6999 7000 // GetActiveRootKeyMeta returns the metadata for the currently active root key 7001 func (s *StateStore) GetActiveRootKeyMeta(ws memdb.WatchSet) (*structs.RootKeyMeta, error) { 7002 txn := s.db.ReadTxn() 7003 7004 iter, err := txn.Get(TableRootKeyMeta, indexID) 7005 if err != nil { 7006 return nil, err 7007 } 7008 ws.Add(iter.WatchCh()) 7009 7010 for { 7011 raw := iter.Next() 7012 if raw == nil { 7013 break 7014 } 7015 key := raw.(*structs.RootKeyMeta) 7016 if key.Active() { 7017 return key, nil 7018 } 7019 } 7020 return nil, nil 7021 } 7022 7023 // IsRootKeyMetaInUse determines whether a key has been used to sign a workload 7024 // identity for a live allocation or encrypt any variables 7025 func (s *StateStore) IsRootKeyMetaInUse(keyID string) (bool, error) { 7026 txn := s.db.ReadTxn() 7027 7028 iter, err := txn.Get(TableAllocs, indexSigningKey, keyID, true) 7029 if err != nil { 7030 return false, err 7031 } 7032 alloc := iter.Next() 7033 if alloc != nil { 7034 return true, nil 7035 } 7036 7037 iter, err = txn.Get(TableVariables, indexKeyID, keyID) 7038 if err != nil { 7039 return false, err 7040 } 7041 variable := iter.Next() 7042 if variable != nil { 7043 return true, nil 7044 } 7045 7046 return false, nil 7047 }