github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "context" 5 "fmt" 6 "reflect" 7 "sort" 8 "strings" 9 "time" 10 11 log "github.com/hashicorp/go-hclog" 12 memdb "github.com/hashicorp/go-memdb" 13 multierror "github.com/hashicorp/go-multierror" 14 "github.com/pkg/errors" 15 16 "github.com/hashicorp/nomad/helper" 17 "github.com/hashicorp/nomad/nomad/stream" 18 "github.com/hashicorp/nomad/nomad/structs" 19 ) 20 21 // Txn is a transaction against a state store. 22 // This can be a read or write transaction. 23 type Txn = *txn 24 25 const ( 26 // NodeRegisterEventReregistered is the message used when the node becomes 27 // reregistered. 28 NodeRegisterEventRegistered = "Node registered" 29 30 // NodeRegisterEventReregistered is the message used when the node becomes 31 // reregistered. 32 NodeRegisterEventReregistered = "Node re-registered" 33 ) 34 35 // IndexEntry is used with the "index" table 36 // for managing the latest Raft index affecting a table. 37 type IndexEntry struct { 38 Key string 39 Value uint64 40 } 41 42 // StateStoreConfig is used to configure a new state store 43 type StateStoreConfig struct { 44 // Logger is used to output the state store's logs 45 Logger log.Logger 46 47 // Region is the region of the server embedding the state store. 48 Region string 49 50 // EnablePublisher is used to enable or disable the event publisher 51 EnablePublisher bool 52 53 // EventBufferSize configures the amount of events to hold in memory 54 EventBufferSize int64 55 } 56 57 // The StateStore is responsible for maintaining all the Nomad 58 // state. It is manipulated by the FSM which maintains consistency 59 // through the use of Raft. The goals of the StateStore are to provide 60 // high concurrency for read operations without blocking writes, and 61 // to provide write availability in the face of reads. EVERY object 62 // returned as a result of a read against the state store should be 63 // considered a constant and NEVER modified in place. 64 type StateStore struct { 65 logger log.Logger 66 db *changeTrackerDB 67 68 // config is the passed in configuration 69 config *StateStoreConfig 70 71 // abandonCh is used to signal watchers that this state store has been 72 // abandoned (usually during a restore). This is only ever closed. 73 abandonCh chan struct{} 74 75 // TODO: refactor abandonCh to use a context so that both can use the same 76 // cancel mechanism. 77 stopEventBroker func() 78 } 79 80 type streamACLDelegate struct { 81 s *StateStore 82 } 83 84 func (a *streamACLDelegate) TokenProvider() stream.ACLTokenProvider { 85 resolver, _ := a.s.Snapshot() 86 return resolver 87 } 88 89 // NewStateStore is used to create a new state store 90 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 91 // Create the MemDB 92 db, err := memdb.NewMemDB(stateStoreSchema()) 93 if err != nil { 94 return nil, fmt.Errorf("state store setup failed: %v", err) 95 } 96 97 // Create the state store 98 ctx, cancel := context.WithCancel(context.TODO()) 99 s := &StateStore{ 100 logger: config.Logger.Named("state_store"), 101 config: config, 102 abandonCh: make(chan struct{}), 103 stopEventBroker: cancel, 104 } 105 106 if config.EnablePublisher { 107 // Create new event publisher using provided config 108 broker, err := stream.NewEventBroker(ctx, &streamACLDelegate{s}, stream.EventBrokerCfg{ 109 EventBufferSize: config.EventBufferSize, 110 Logger: config.Logger, 111 }) 112 if err != nil { 113 return nil, fmt.Errorf("creating state store event broker %w", err) 114 } 115 s.db = NewChangeTrackerDB(db, broker, eventsFromChanges) 116 } else { 117 s.db = NewChangeTrackerDB(db, nil, noOpProcessChanges) 118 } 119 120 // Initialize the state store with the default namespace. 121 if err := s.namespaceInit(); err != nil { 122 return nil, fmt.Errorf("enterprise state store initialization failed: %v", err) 123 } 124 125 return s, nil 126 } 127 128 // NewWatchSet returns a new memdb.WatchSet that adds the state stores abandonCh 129 // as a watcher. This is important in that it will notify when this specific 130 // state store is no longer valid, usually due to a new snapshot being loaded 131 func (s *StateStore) NewWatchSet() memdb.WatchSet { 132 ws := memdb.NewWatchSet() 133 ws.Add(s.AbandonCh()) 134 return ws 135 } 136 137 func (s *StateStore) EventBroker() (*stream.EventBroker, error) { 138 if s.db.publisher == nil { 139 return nil, fmt.Errorf("EventBroker not configured") 140 } 141 return s.db.publisher, nil 142 } 143 144 // namespaceInit ensures the default namespace exists. 145 func (s *StateStore) namespaceInit() error { 146 // Create the default namespace. This is safe to do every time we create the 147 // state store. There are two main cases, a brand new cluster in which case 148 // each server will have the same default namespace object, or a new cluster 149 // in which case if the default namespace has been modified, it will be 150 // overridden by the restore code path. 151 defaultNs := &structs.Namespace{ 152 Name: structs.DefaultNamespace, 153 Description: structs.DefaultNamespaceDescription, 154 } 155 156 if err := s.UpsertNamespaces(1, []*structs.Namespace{defaultNs}); err != nil { 157 return fmt.Errorf("inserting default namespace failed: %v", err) 158 } 159 160 return nil 161 } 162 163 // Config returns the state store configuration. 164 func (s *StateStore) Config() *StateStoreConfig { 165 return s.config 166 } 167 168 // Snapshot is used to create a point in time snapshot. Because 169 // we use MemDB, we just need to snapshot the state of the underlying 170 // database. 171 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 172 memDBSnap := s.db.memdb.Snapshot() 173 174 store := StateStore{ 175 logger: s.logger, 176 config: s.config, 177 } 178 179 // Create a new change tracker DB that does not publish or track changes 180 store.db = NewChangeTrackerDB(memDBSnap, nil, noOpProcessChanges) 181 182 snap := &StateSnapshot{ 183 StateStore: store, 184 } 185 return snap, nil 186 } 187 188 // SnapshotMinIndex is used to create a state snapshot where the index is 189 // guaranteed to be greater than or equal to the index parameter. 190 // 191 // Some server operations (such as scheduling) exchange objects via RPC 192 // concurrent with Raft log application, so they must ensure the state store 193 // snapshot they are operating on is at or after the index the objects 194 // retrieved via RPC were applied to the Raft log at. 195 // 196 // Callers should maintain their own timer metric as the time this method 197 // blocks indicates Raft log application latency relative to scheduling. 198 func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*StateSnapshot, error) { 199 // Ported from work.go:waitForIndex prior to 0.9 200 201 const backoffBase = 20 * time.Millisecond 202 const backoffLimit = 1 * time.Second 203 var retries uint 204 var retryTimer *time.Timer 205 206 // XXX: Potential optimization is to set up a watch on the state 207 // store's index table and only unblock via a trigger rather than 208 // polling. 209 for { 210 // Get the states current index 211 snapshotIndex, err := s.LatestIndex() 212 if err != nil { 213 return nil, fmt.Errorf("failed to determine state store's index: %v", err) 214 } 215 216 // We only need the FSM state to be as recent as the given index 217 if snapshotIndex >= index { 218 return s.Snapshot() 219 } 220 221 // Exponential back off 222 retries++ 223 if retryTimer == nil { 224 // First retry, start at baseline 225 retryTimer = time.NewTimer(backoffBase) 226 } else { 227 // Subsequent retry, reset timer 228 deadline := 1 << (2 * retries) * backoffBase 229 if deadline > backoffLimit { 230 deadline = backoffLimit 231 } 232 retryTimer.Reset(deadline) 233 } 234 235 select { 236 case <-ctx.Done(): 237 return nil, ctx.Err() 238 case <-retryTimer.C: 239 } 240 } 241 } 242 243 // Restore is used to optimize the efficiency of rebuilding 244 // state by minimizing the number of transactions and checking 245 // overhead. 246 func (s *StateStore) Restore() (*StateRestore, error) { 247 txn := s.db.WriteTxnRestore() 248 r := &StateRestore{ 249 txn: txn, 250 } 251 return r, nil 252 } 253 254 // AbandonCh returns a channel you can wait on to know if the state store was 255 // abandoned. 256 func (s *StateStore) AbandonCh() <-chan struct{} { 257 return s.abandonCh 258 } 259 260 // Abandon is used to signal that the given state store has been abandoned. 261 // Calling this more than one time will panic. 262 func (s *StateStore) Abandon() { 263 s.StopEventBroker() 264 close(s.abandonCh) 265 } 266 267 // StopStopEventBroker calls the cancel func for the state stores event 268 // publisher. It should be called during server shutdown. 269 func (s *StateStore) StopEventBroker() { 270 s.stopEventBroker() 271 } 272 273 // QueryFn is the definition of a function that can be used to implement a basic 274 // blocking query against the state store. 275 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 276 277 // BlockingQuery takes a query function and runs the function until the minimum 278 // query index is met or until the passed context is cancelled. 279 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 280 resp interface{}, index uint64, err error) { 281 282 RUN_QUERY: 283 // We capture the state store and its abandon channel but pass a snapshot to 284 // the blocking query function. We operate on the snapshot to allow separate 285 // calls to the state store not all wrapped within the same transaction. 286 abandonCh := s.AbandonCh() 287 snap, _ := s.Snapshot() 288 stateSnap := &snap.StateStore 289 290 // We can skip all watch tracking if this isn't a blocking query. 291 var ws memdb.WatchSet 292 if minIndex > 0 { 293 ws = memdb.NewWatchSet() 294 295 // This channel will be closed if a snapshot is restored and the 296 // whole state store is abandoned. 297 ws.Add(abandonCh) 298 } 299 300 resp, index, err = query(ws, stateSnap) 301 if err != nil { 302 return nil, index, err 303 } 304 305 // We haven't reached the min-index yet. 306 if minIndex > 0 && index <= minIndex { 307 if err := ws.WatchCtx(ctx); err != nil { 308 return nil, index, err 309 } 310 311 goto RUN_QUERY 312 } 313 314 return resp, index, nil 315 } 316 317 // UpsertPlanResults is used to upsert the results of a plan. 318 func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64, results *structs.ApplyPlanResultsRequest) error { 319 snapshot, err := s.Snapshot() 320 if err != nil { 321 return err 322 } 323 324 allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped) 325 if err != nil { 326 return err 327 } 328 329 allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted) 330 if err != nil { 331 return err 332 } 333 334 // COMPAT 0.11: Remove this denormalization when NodePreemptions is removed 335 results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions) 336 if err != nil { 337 return err 338 } 339 340 txn := s.db.WriteTxnMsgT(msgType, index) 341 defer txn.Abort() 342 343 // Upsert the newly created or updated deployment 344 if results.Deployment != nil { 345 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 346 return err 347 } 348 } 349 350 // Update the status of deployments effected by the plan. 351 if len(results.DeploymentUpdates) != 0 { 352 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 353 } 354 355 if results.EvalID != "" { 356 // Update the modify index of the eval id 357 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 358 return err 359 } 360 } 361 362 numAllocs := 0 363 if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 { 364 // COMPAT 0.11: This branch will be removed, when Alloc is removed 365 // Attach the job to all the allocations. It is pulled out in the payload to 366 // avoid the redundancy of encoding, but should be denormalized prior to 367 // being inserted into MemDB. 368 addComputedAllocAttrs(results.Alloc, results.Job) 369 numAllocs = len(results.Alloc) + len(results.NodePreemptions) 370 } else { 371 // Attach the job to all the allocations. It is pulled out in the payload to 372 // avoid the redundancy of encoding, but should be denormalized prior to 373 // being inserted into MemDB. 374 addComputedAllocAttrs(results.AllocsUpdated, results.Job) 375 numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted) 376 } 377 378 allocsToUpsert := make([]*structs.Allocation, 0, numAllocs) 379 380 // COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed 381 allocsToUpsert = append(allocsToUpsert, results.Alloc...) 382 allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...) 383 384 allocsToUpsert = append(allocsToUpsert, allocsStopped...) 385 allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...) 386 allocsToUpsert = append(allocsToUpsert, allocsPreempted...) 387 388 // handle upgrade path 389 for _, alloc := range allocsToUpsert { 390 alloc.Canonicalize() 391 } 392 393 if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil { 394 return err 395 } 396 397 // Upsert followup evals for allocs that were preempted 398 for _, eval := range results.PreemptionEvals { 399 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 400 return err 401 } 402 } 403 404 return txn.Commit() 405 } 406 407 // addComputedAllocAttrs adds the computed/derived attributes to the allocation. 408 // This method is used when an allocation is being denormalized. 409 func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) { 410 structs.DenormalizeAllocationJobs(job, allocs) 411 412 // COMPAT(0.11): Remove in 0.11 413 // Calculate the total resources of allocations. It is pulled out in the 414 // payload to avoid encoding something that can be computed, but should be 415 // denormalized prior to being inserted into MemDB. 416 for _, alloc := range allocs { 417 if alloc.Resources != nil { 418 continue 419 } 420 421 alloc.Resources = new(structs.Resources) 422 for _, task := range alloc.TaskResources { 423 alloc.Resources.Add(task) 424 } 425 426 // Add the shared resources 427 alloc.Resources.Add(alloc.SharedResources) 428 } 429 } 430 431 // upsertDeploymentUpdates updates the deployments given the passed status 432 // updates. 433 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *txn) error { 434 for _, u := range updates { 435 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 436 return err 437 } 438 } 439 440 return nil 441 } 442 443 // UpsertJobSummary upserts a job summary into the state store. 444 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 445 txn := s.db.WriteTxn(index) 446 defer txn.Abort() 447 448 // Check if the job summary already exists 449 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 450 if err != nil { 451 return fmt.Errorf("job summary lookup failed: %v", err) 452 } 453 454 // Setup the indexes correctly 455 if existing != nil { 456 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 457 jobSummary.ModifyIndex = index 458 } else { 459 jobSummary.CreateIndex = index 460 jobSummary.ModifyIndex = index 461 } 462 463 // Update the index 464 if err := txn.Insert("job_summary", jobSummary); err != nil { 465 return err 466 } 467 468 // Update the indexes table for job summary 469 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 470 return fmt.Errorf("index update failed: %v", err) 471 } 472 473 return txn.Commit() 474 } 475 476 // DeleteJobSummary deletes the job summary with the given ID. This is for 477 // testing purposes only. 478 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 479 txn := s.db.WriteTxn(index) 480 defer txn.Abort() 481 482 // Delete the job summary 483 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 484 return fmt.Errorf("deleting job summary failed: %v", err) 485 } 486 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 487 return fmt.Errorf("index update failed: %v", err) 488 } 489 return txn.Commit() 490 } 491 492 // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to 493 // true, all prior deployments for the same job will be cancelled. 494 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 495 txn := s.db.WriteTxn(index) 496 defer txn.Abort() 497 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 498 return err 499 } 500 return txn.Commit() 501 } 502 503 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *txn) error { 504 // Check if the deployment already exists 505 existing, err := txn.First("deployment", "id", deployment.ID) 506 if err != nil { 507 return fmt.Errorf("deployment lookup failed: %v", err) 508 } 509 510 // Setup the indexes correctly 511 if existing != nil { 512 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 513 deployment.ModifyIndex = index 514 } else { 515 deployment.CreateIndex = index 516 deployment.ModifyIndex = index 517 } 518 519 // Insert the deployment 520 if err := txn.Insert("deployment", deployment); err != nil { 521 return err 522 } 523 524 // Update the indexes table for deployment 525 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 526 return fmt.Errorf("index update failed: %v", err) 527 } 528 529 // If the deployment is being marked as complete, set the job to stable. 530 if deployment.Status == structs.DeploymentStatusSuccessful { 531 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 532 return fmt.Errorf("failed to update job stability: %v", err) 533 } 534 } 535 536 return nil 537 } 538 539 func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) { 540 txn := s.db.ReadTxn() 541 542 // Walk the entire deployments table 543 iter, err := txn.Get("deployment", "id") 544 if err != nil { 545 return nil, err 546 } 547 548 ws.Add(iter.WatchCh()) 549 return iter, nil 550 } 551 552 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 553 txn := s.db.ReadTxn() 554 555 // Walk the entire deployments table 556 iter, err := txn.Get("deployment", "namespace", namespace) 557 if err != nil { 558 return nil, err 559 } 560 561 ws.Add(iter.WatchCh()) 562 return iter, nil 563 } 564 565 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) { 566 txn := s.db.ReadTxn() 567 568 // Walk the entire deployments table 569 iter, err := txn.Get("deployment", "id_prefix", deploymentID) 570 if err != nil { 571 return nil, err 572 } 573 574 ws.Add(iter.WatchCh()) 575 576 // Wrap the iterator in a filter 577 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 578 return wrap, nil 579 } 580 581 // deploymentNamespaceFilter returns a filter function that filters all 582 // deployment not in the given namespace. 583 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 584 return func(raw interface{}) bool { 585 d, ok := raw.(*structs.Deployment) 586 if !ok { 587 return true 588 } 589 590 return d.Namespace != namespace 591 } 592 } 593 594 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 595 txn := s.db.ReadTxn() 596 return s.deploymentByIDImpl(ws, deploymentID, txn) 597 } 598 599 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *txn) (*structs.Deployment, error) { 600 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 601 if err != nil { 602 return nil, fmt.Errorf("deployment lookup failed: %v", err) 603 } 604 ws.Add(watchCh) 605 606 if existing != nil { 607 return existing.(*structs.Deployment), nil 608 } 609 610 return nil, nil 611 } 612 613 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) { 614 txn := s.db.ReadTxn() 615 616 var job *structs.Job 617 // Read job from state store 618 _, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID) 619 if err != nil { 620 return nil, fmt.Errorf("job lookup failed: %v", err) 621 } 622 if existing != nil { 623 job = existing.(*structs.Job) 624 } 625 626 // Get an iterator over the deployments 627 iter, err := txn.Get("deployment", "job", namespace, jobID) 628 if err != nil { 629 return nil, err 630 } 631 632 ws.Add(iter.WatchCh()) 633 634 var out []*structs.Deployment 635 for { 636 raw := iter.Next() 637 if raw == nil { 638 break 639 } 640 d := raw.(*structs.Deployment) 641 642 // If the allocation belongs to a job with the same ID but a different 643 // create index and we are not getting all the allocations whose Jobs 644 // matches the same Job ID then we skip it 645 if !all && job != nil && d.JobCreateIndex != job.CreateIndex { 646 continue 647 } 648 out = append(out, d) 649 } 650 651 return out, nil 652 } 653 654 // LatestDeploymentByJobID returns the latest deployment for the given job. The 655 // latest is determined strictly by CreateIndex. 656 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 657 txn := s.db.ReadTxn() 658 659 // Get an iterator over the deployments 660 iter, err := txn.Get("deployment", "job", namespace, jobID) 661 if err != nil { 662 return nil, err 663 } 664 665 ws.Add(iter.WatchCh()) 666 667 var out *structs.Deployment 668 for { 669 raw := iter.Next() 670 if raw == nil { 671 break 672 } 673 674 d := raw.(*structs.Deployment) 675 if out == nil || out.CreateIndex < d.CreateIndex { 676 out = d 677 } 678 } 679 680 return out, nil 681 } 682 683 // DeleteDeployment is used to delete a set of deployments by ID 684 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 685 txn := s.db.WriteTxn(index) 686 defer txn.Abort() 687 688 if len(deploymentIDs) == 0 { 689 return nil 690 } 691 692 for _, deploymentID := range deploymentIDs { 693 // Lookup the deployment 694 existing, err := txn.First("deployment", "id", deploymentID) 695 if err != nil { 696 return fmt.Errorf("deployment lookup failed: %v", err) 697 } 698 if existing == nil { 699 return fmt.Errorf("deployment not found") 700 } 701 702 // Delete the deployment 703 if err := txn.Delete("deployment", existing); err != nil { 704 return fmt.Errorf("deployment delete failed: %v", err) 705 } 706 } 707 708 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 709 return fmt.Errorf("index update failed: %v", err) 710 } 711 712 return txn.Commit() 713 } 714 715 // UpsertScalingEvent is used to insert a new scaling event. 716 // Only the most recent JobTrackedScalingEvents will be kept. 717 func (s *StateStore) UpsertScalingEvent(index uint64, req *structs.ScalingEventRequest) error { 718 txn := s.db.WriteTxn(index) 719 defer txn.Abort() 720 721 // Get the existing events 722 existing, err := txn.First("scaling_event", "id", req.Namespace, req.JobID) 723 if err != nil { 724 return fmt.Errorf("scaling event lookup failed: %v", err) 725 } 726 727 var jobEvents *structs.JobScalingEvents 728 if existing != nil { 729 jobEvents = existing.(*structs.JobScalingEvents) 730 } else { 731 jobEvents = &structs.JobScalingEvents{ 732 Namespace: req.Namespace, 733 JobID: req.JobID, 734 ScalingEvents: make(map[string][]*structs.ScalingEvent), 735 } 736 } 737 738 jobEvents.ModifyIndex = index 739 req.ScalingEvent.CreateIndex = index 740 741 events := jobEvents.ScalingEvents[req.TaskGroup] 742 // Prepend this latest event 743 events = append( 744 []*structs.ScalingEvent{req.ScalingEvent}, 745 events..., 746 ) 747 // Truncate older events 748 if len(events) > structs.JobTrackedScalingEvents { 749 events = events[0:structs.JobTrackedScalingEvents] 750 } 751 jobEvents.ScalingEvents[req.TaskGroup] = events 752 753 // Insert the new event 754 if err := txn.Insert("scaling_event", jobEvents); err != nil { 755 return fmt.Errorf("scaling event insert failed: %v", err) 756 } 757 758 // Update the indexes table for scaling_event 759 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 760 return fmt.Errorf("index update failed: %v", err) 761 } 762 763 return txn.Commit() 764 } 765 766 // ScalingEvents returns an iterator over all the job scaling events 767 func (s *StateStore) ScalingEvents(ws memdb.WatchSet) (memdb.ResultIterator, error) { 768 txn := s.db.ReadTxn() 769 770 // Walk the entire scaling_event table 771 iter, err := txn.Get("scaling_event", "id") 772 if err != nil { 773 return nil, err 774 } 775 776 ws.Add(iter.WatchCh()) 777 778 return iter, nil 779 } 780 781 func (s *StateStore) ScalingEventsByJob(ws memdb.WatchSet, namespace, jobID string) (map[string][]*structs.ScalingEvent, uint64, error) { 782 txn := s.db.ReadTxn() 783 784 watchCh, existing, err := txn.FirstWatch("scaling_event", "id", namespace, jobID) 785 if err != nil { 786 return nil, 0, fmt.Errorf("job scaling events lookup failed: %v", err) 787 } 788 ws.Add(watchCh) 789 790 if existing != nil { 791 events := existing.(*structs.JobScalingEvents) 792 return events.ScalingEvents, events.ModifyIndex, nil 793 } 794 return nil, 0, nil 795 } 796 797 // UpsertNode is used to register a node or update a node definition 798 // This is assumed to be triggered by the client, so we retain the value 799 // of drain/eligibility which is set by the scheduler. 800 func (s *StateStore) UpsertNode(msgType structs.MessageType, index uint64, node *structs.Node) error { 801 txn := s.db.WriteTxnMsgT(msgType, index) 802 defer txn.Abort() 803 804 err := upsertNodeTxn(txn, index, node) 805 if err != nil { 806 return nil 807 } 808 return txn.Commit() 809 } 810 811 func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { 812 // Check if the node already exists 813 existing, err := txn.First("nodes", "id", node.ID) 814 if err != nil { 815 return fmt.Errorf("node lookup failed: %v", err) 816 } 817 818 // Setup the indexes correctly 819 if existing != nil { 820 exist := existing.(*structs.Node) 821 node.CreateIndex = exist.CreateIndex 822 node.ModifyIndex = index 823 824 // Retain node events that have already been set on the node 825 node.Events = exist.Events 826 827 // If we are transitioning from down, record the re-registration 828 if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown { 829 appendNodeEvents(index, node, []*structs.NodeEvent{ 830 structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 831 SetMessage(NodeRegisterEventReregistered). 832 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))}) 833 } 834 835 node.Drain = exist.Drain // Retain the drain mode 836 node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility 837 node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy 838 } else { 839 // Because this is the first time the node is being registered, we should 840 // also create a node registration event 841 nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 842 SetMessage(NodeRegisterEventRegistered). 843 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0)) 844 node.Events = []*structs.NodeEvent{nodeEvent} 845 node.CreateIndex = index 846 node.ModifyIndex = index 847 } 848 849 // Insert the node 850 if err := txn.Insert("nodes", node); err != nil { 851 return fmt.Errorf("node insert failed: %v", err) 852 } 853 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 854 return fmt.Errorf("index update failed: %v", err) 855 } 856 if err := upsertNodeCSIPlugins(txn, node, index); err != nil { 857 return fmt.Errorf("csi plugin update failed: %v", err) 858 } 859 860 return nil 861 } 862 863 // DeleteNode deregisters a batch of nodes 864 func (s *StateStore) DeleteNode(msgType structs.MessageType, index uint64, nodes []string) error { 865 txn := s.db.WriteTxn(index) 866 defer txn.Abort() 867 868 err := deleteNodeTxn(txn, index, nodes) 869 if err != nil { 870 return nil 871 } 872 return txn.Commit() 873 } 874 875 func deleteNodeTxn(txn *txn, index uint64, nodes []string) error { 876 if len(nodes) == 0 { 877 return fmt.Errorf("node ids missing") 878 } 879 880 for _, nodeID := range nodes { 881 existing, err := txn.First("nodes", "id", nodeID) 882 if err != nil { 883 return fmt.Errorf("node lookup failed: %s: %v", nodeID, err) 884 } 885 if existing == nil { 886 return fmt.Errorf("node not found: %s", nodeID) 887 } 888 889 // Delete the node 890 if err := txn.Delete("nodes", existing); err != nil { 891 return fmt.Errorf("node delete failed: %s: %v", nodeID, err) 892 } 893 894 node := existing.(*structs.Node) 895 if err := deleteNodeCSIPlugins(txn, node, index); err != nil { 896 return fmt.Errorf("csi plugin delete failed: %v", err) 897 } 898 } 899 900 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 901 return fmt.Errorf("index update failed: %v", err) 902 } 903 904 return nil 905 } 906 907 // UpdateNodeStatus is used to update the status of a node 908 func (s *StateStore) UpdateNodeStatus(msgType structs.MessageType, index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 909 txn := s.db.WriteTxnMsgT(msgType, index) 910 defer txn.Abort() 911 912 if err := s.updateNodeStatusTxn(txn, nodeID, status, updatedAt, event); err != nil { 913 return err 914 } 915 916 return txn.Commit() 917 } 918 919 func (s *StateStore) updateNodeStatusTxn(txn *txn, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 920 921 // Lookup the node 922 existing, err := txn.First("nodes", "id", nodeID) 923 if err != nil { 924 return fmt.Errorf("node lookup failed: %v", err) 925 } 926 if existing == nil { 927 return fmt.Errorf("node not found") 928 } 929 930 // Copy the existing node 931 existingNode := existing.(*structs.Node) 932 copyNode := existingNode.Copy() 933 copyNode.StatusUpdatedAt = updatedAt 934 935 // Add the event if given 936 if event != nil { 937 appendNodeEvents(txn.Index, copyNode, []*structs.NodeEvent{event}) 938 } 939 940 // Update the status in the copy 941 copyNode.Status = status 942 copyNode.ModifyIndex = txn.Index 943 944 // Insert the node 945 if err := txn.Insert("nodes", copyNode); err != nil { 946 return fmt.Errorf("node update failed: %v", err) 947 } 948 if err := txn.Insert("index", &IndexEntry{"nodes", txn.Index}); err != nil { 949 return fmt.Errorf("index update failed: %v", err) 950 } 951 return nil 952 } 953 954 // BatchUpdateNodeDrain is used to update the drain of a node set of nodes 955 func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { 956 txn := s.db.WriteTxnMsgT(msgType, index) 957 defer txn.Abort() 958 for node, update := range updates { 959 if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, events[node]); err != nil { 960 return err 961 } 962 } 963 return txn.Commit() 964 } 965 966 // UpdateNodeDrain is used to update the drain of a node 967 func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { 968 969 txn := s.db.WriteTxn(index) 970 defer txn.Abort() 971 if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event); err != nil { 972 return err 973 } 974 return txn.Commit() 975 } 976 977 func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, 978 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { 979 980 // Lookup the node 981 existing, err := txn.First("nodes", "id", nodeID) 982 if err != nil { 983 return fmt.Errorf("node lookup failed: %v", err) 984 } 985 if existing == nil { 986 return fmt.Errorf("node not found") 987 } 988 989 // Copy the existing node 990 existingNode := existing.(*structs.Node) 991 copyNode := existingNode.Copy() 992 copyNode.StatusUpdatedAt = updatedAt 993 994 // Add the event if given 995 if event != nil { 996 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 997 } 998 999 // Update the drain in the copy 1000 copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.10 1001 copyNode.DrainStrategy = drain 1002 if drain != nil { 1003 copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible 1004 } else if markEligible { 1005 copyNode.SchedulingEligibility = structs.NodeSchedulingEligible 1006 } 1007 1008 copyNode.ModifyIndex = index 1009 1010 // Insert the node 1011 if err := txn.Insert("nodes", copyNode); err != nil { 1012 return fmt.Errorf("node update failed: %v", err) 1013 } 1014 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1015 return fmt.Errorf("index update failed: %v", err) 1016 } 1017 1018 return nil 1019 } 1020 1021 // UpdateNodeEligibility is used to update the scheduling eligibility of a node 1022 func (s *StateStore) UpdateNodeEligibility(msgType structs.MessageType, index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error { 1023 1024 txn := s.db.WriteTxnMsgT(msgType, index) 1025 defer txn.Abort() 1026 1027 // Lookup the node 1028 existing, err := txn.First("nodes", "id", nodeID) 1029 if err != nil { 1030 return fmt.Errorf("node lookup failed: %v", err) 1031 } 1032 if existing == nil { 1033 return fmt.Errorf("node not found") 1034 } 1035 1036 // Copy the existing node 1037 existingNode := existing.(*structs.Node) 1038 copyNode := existingNode.Copy() 1039 copyNode.StatusUpdatedAt = updatedAt 1040 1041 // Add the event if given 1042 if event != nil { 1043 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 1044 } 1045 1046 // Check if this is a valid action 1047 if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible { 1048 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 1049 } 1050 1051 // Update the eligibility in the copy 1052 copyNode.SchedulingEligibility = eligibility 1053 copyNode.ModifyIndex = index 1054 1055 // Insert the node 1056 if err := txn.Insert("nodes", copyNode); err != nil { 1057 return fmt.Errorf("node update failed: %v", err) 1058 } 1059 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1060 return fmt.Errorf("index update failed: %v", err) 1061 } 1062 1063 return txn.Commit() 1064 } 1065 1066 // UpsertNodeEvents adds the node events to the nodes, rotating events as 1067 // necessary. 1068 func (s *StateStore) UpsertNodeEvents(msgType structs.MessageType, index uint64, nodeEvents map[string][]*structs.NodeEvent) error { 1069 txn := s.db.WriteTxnMsgT(msgType, index) 1070 defer txn.Abort() 1071 1072 for nodeID, events := range nodeEvents { 1073 if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil { 1074 return err 1075 } 1076 } 1077 1078 return txn.Commit() 1079 } 1080 1081 // upsertNodeEvent upserts a node event for a respective node. It also maintains 1082 // that a fixed number of node events are ever stored simultaneously, deleting 1083 // older events once this bound has been reached. 1084 func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *txn) error { 1085 // Lookup the node 1086 existing, err := txn.First("nodes", "id", nodeID) 1087 if err != nil { 1088 return fmt.Errorf("node lookup failed: %v", err) 1089 } 1090 if existing == nil { 1091 return fmt.Errorf("node not found") 1092 } 1093 1094 // Copy the existing node 1095 existingNode := existing.(*structs.Node) 1096 copyNode := existingNode.Copy() 1097 appendNodeEvents(index, copyNode, events) 1098 1099 // Insert the node 1100 if err := txn.Insert("nodes", copyNode); err != nil { 1101 return fmt.Errorf("node update failed: %v", err) 1102 } 1103 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1104 return fmt.Errorf("index update failed: %v", err) 1105 } 1106 1107 return nil 1108 } 1109 1110 // appendNodeEvents is a helper that takes a node and new events and appends 1111 // them, pruning older events as needed. 1112 func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) { 1113 // Add the events, updating the indexes 1114 for _, e := range events { 1115 e.CreateIndex = index 1116 node.Events = append(node.Events, e) 1117 } 1118 1119 // Keep node events pruned to not exceed the max allowed 1120 if l := len(node.Events); l > structs.MaxRetainedNodeEvents { 1121 delta := l - structs.MaxRetainedNodeEvents 1122 node.Events = node.Events[delta:] 1123 } 1124 } 1125 1126 // upsertNodeCSIPlugins indexes csi plugins for volume retrieval, with health. It's called 1127 // on upsertNodeEvents, so that event driven health changes are updated 1128 func upsertNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error { 1129 1130 loop := func(info *structs.CSIInfo) error { 1131 raw, err := txn.First("csi_plugins", "id", info.PluginID) 1132 if err != nil { 1133 return fmt.Errorf("csi_plugin lookup error: %s %v", info.PluginID, err) 1134 } 1135 1136 var plug *structs.CSIPlugin 1137 if raw != nil { 1138 plug = raw.(*structs.CSIPlugin).Copy() 1139 } else { 1140 if !info.Healthy { 1141 // we don't want to create new plugins for unhealthy 1142 // allocs, otherwise we'd recreate the plugin when we 1143 // get the update for the alloc becoming terminal 1144 return nil 1145 } 1146 plug = structs.NewCSIPlugin(info.PluginID, index) 1147 } 1148 1149 // the plugin may have been created by the job being updated, in which case 1150 // this data will not be configured, it's only available to the fingerprint 1151 // system 1152 plug.Provider = info.Provider 1153 plug.Version = info.ProviderVersion 1154 1155 err = plug.AddPlugin(node.ID, info) 1156 if err != nil { 1157 return err 1158 } 1159 1160 plug.ModifyIndex = index 1161 1162 err = txn.Insert("csi_plugins", plug) 1163 if err != nil { 1164 return fmt.Errorf("csi_plugins insert error: %v", err) 1165 } 1166 1167 return nil 1168 } 1169 1170 inUseController := map[string]struct{}{} 1171 inUseNode := map[string]struct{}{} 1172 1173 for _, info := range node.CSIControllerPlugins { 1174 err := loop(info) 1175 if err != nil { 1176 return err 1177 } 1178 inUseController[info.PluginID] = struct{}{} 1179 } 1180 1181 for _, info := range node.CSINodePlugins { 1182 err := loop(info) 1183 if err != nil { 1184 return err 1185 } 1186 inUseNode[info.PluginID] = struct{}{} 1187 } 1188 1189 // remove the client node from any plugin that's not 1190 // running on it. 1191 iter, err := txn.Get("csi_plugins", "id") 1192 if err != nil { 1193 return fmt.Errorf("csi_plugins lookup failed: %v", err) 1194 } 1195 for { 1196 raw := iter.Next() 1197 if raw == nil { 1198 break 1199 } 1200 plug, ok := raw.(*structs.CSIPlugin) 1201 if !ok { 1202 continue 1203 } 1204 plug = plug.Copy() 1205 1206 var hadDelete bool 1207 if _, ok := inUseController[plug.ID]; !ok { 1208 if _, asController := plug.Controllers[node.ID]; asController { 1209 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeController) 1210 if err != nil { 1211 return err 1212 } 1213 hadDelete = true 1214 } 1215 } 1216 if _, ok := inUseNode[plug.ID]; !ok { 1217 if _, asNode := plug.Nodes[node.ID]; asNode { 1218 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeNode) 1219 if err != nil { 1220 return err 1221 } 1222 hadDelete = true 1223 } 1224 } 1225 // we check this flag both for performance and to make sure we 1226 // don't delete a plugin when registering a node plugin but 1227 // no controller 1228 if hadDelete { 1229 err = updateOrGCPlugin(index, txn, plug) 1230 if err != nil { 1231 return err 1232 } 1233 } 1234 } 1235 1236 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1237 return fmt.Errorf("index update failed: %v", err) 1238 } 1239 1240 return nil 1241 } 1242 1243 // deleteNodeCSIPlugins cleans up CSIInfo node health status, called in DeleteNode 1244 func deleteNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error { 1245 if len(node.CSIControllerPlugins) == 0 && len(node.CSINodePlugins) == 0 { 1246 return nil 1247 } 1248 1249 names := map[string]struct{}{} 1250 for _, info := range node.CSIControllerPlugins { 1251 names[info.PluginID] = struct{}{} 1252 } 1253 for _, info := range node.CSINodePlugins { 1254 names[info.PluginID] = struct{}{} 1255 } 1256 1257 for id := range names { 1258 raw, err := txn.First("csi_plugins", "id", id) 1259 if err != nil { 1260 return fmt.Errorf("csi_plugins lookup error %s: %v", id, err) 1261 } 1262 if raw == nil { 1263 // plugin may have been deregistered but we didn't 1264 // update the fingerprint yet 1265 continue 1266 } 1267 1268 plug := raw.(*structs.CSIPlugin).Copy() 1269 err = plug.DeleteNode(node.ID) 1270 if err != nil { 1271 return err 1272 } 1273 err = updateOrGCPlugin(index, txn, plug) 1274 if err != nil { 1275 return err 1276 } 1277 } 1278 1279 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1280 return fmt.Errorf("index update failed: %v", err) 1281 } 1282 1283 return nil 1284 } 1285 1286 // updateOrGCPlugin updates a plugin but will delete it if the plugin is empty 1287 func updateOrGCPlugin(index uint64, txn Txn, plug *structs.CSIPlugin) error { 1288 plug.ModifyIndex = index 1289 1290 if plug.IsEmpty() { 1291 err := txn.Delete("csi_plugins", plug) 1292 if err != nil { 1293 return fmt.Errorf("csi_plugins delete error: %v", err) 1294 } 1295 } else { 1296 err := txn.Insert("csi_plugins", plug) 1297 if err != nil { 1298 return fmt.Errorf("csi_plugins update error %s: %v", plug.ID, err) 1299 } 1300 } 1301 return nil 1302 } 1303 1304 // deleteJobFromPlugins removes the allocations of this job from any plugins the job is 1305 // running, possibly deleting the plugin if it's no longer in use. It's called in DeleteJobTxn 1306 func (s *StateStore) deleteJobFromPlugins(index uint64, txn Txn, job *structs.Job) error { 1307 ws := memdb.NewWatchSet() 1308 summary, err := s.JobSummaryByID(ws, job.Namespace, job.ID) 1309 if err != nil { 1310 return fmt.Errorf("error getting job summary: %v", err) 1311 } 1312 1313 allocs, err := s.AllocsByJob(ws, job.Namespace, job.ID, false) 1314 if err != nil { 1315 return fmt.Errorf("error getting allocations: %v", err) 1316 } 1317 1318 type pair struct { 1319 pluginID string 1320 alloc *structs.Allocation 1321 } 1322 1323 plugAllocs := []*pair{} 1324 found := map[string]struct{}{} 1325 1326 // Find plugins for allocs that belong to this job 1327 for _, a := range allocs { 1328 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1329 found[tg.Name] = struct{}{} 1330 for _, t := range tg.Tasks { 1331 if t.CSIPluginConfig == nil { 1332 continue 1333 } 1334 plugAllocs = append(plugAllocs, &pair{ 1335 pluginID: t.CSIPluginConfig.ID, 1336 alloc: a, 1337 }) 1338 } 1339 } 1340 1341 // Find any plugins that do not yet have allocs for this job 1342 for _, tg := range job.TaskGroups { 1343 if _, ok := found[tg.Name]; ok { 1344 continue 1345 } 1346 1347 for _, t := range tg.Tasks { 1348 if t.CSIPluginConfig == nil { 1349 continue 1350 } 1351 plugAllocs = append(plugAllocs, &pair{ 1352 pluginID: t.CSIPluginConfig.ID, 1353 }) 1354 } 1355 } 1356 1357 plugins := map[string]*structs.CSIPlugin{} 1358 1359 for _, x := range plugAllocs { 1360 plug, ok := plugins[x.pluginID] 1361 1362 if !ok { 1363 plug, err = s.CSIPluginByIDTxn(txn, nil, x.pluginID) 1364 if err != nil { 1365 return fmt.Errorf("error getting plugin: %s, %v", x.pluginID, err) 1366 } 1367 if plug == nil { 1368 return fmt.Errorf("plugin missing: %s %v", x.pluginID, err) 1369 } 1370 // only copy once, so we update the same plugin on each alloc 1371 plugins[x.pluginID] = plug.Copy() 1372 plug = plugins[x.pluginID] 1373 } 1374 1375 if x.alloc == nil { 1376 continue 1377 } 1378 err := plug.DeleteAlloc(x.alloc.ID, x.alloc.NodeID) 1379 if err != nil { 1380 return err 1381 } 1382 } 1383 1384 for _, plug := range plugins { 1385 plug.DeleteJob(job, summary) 1386 err = updateOrGCPlugin(index, txn, plug) 1387 if err != nil { 1388 return err 1389 } 1390 } 1391 1392 if err = txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1393 return fmt.Errorf("index update failed: %v", err) 1394 } 1395 1396 return nil 1397 } 1398 1399 // NodeByID is used to lookup a node by ID 1400 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 1401 txn := s.db.ReadTxn() 1402 1403 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 1404 if err != nil { 1405 return nil, fmt.Errorf("node lookup failed: %v", err) 1406 } 1407 ws.Add(watchCh) 1408 1409 if existing != nil { 1410 return existing.(*structs.Node), nil 1411 } 1412 return nil, nil 1413 } 1414 1415 // NodesByIDPrefix is used to lookup nodes by prefix 1416 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 1417 txn := s.db.ReadTxn() 1418 1419 iter, err := txn.Get("nodes", "id_prefix", nodeID) 1420 if err != nil { 1421 return nil, fmt.Errorf("node lookup failed: %v", err) 1422 } 1423 ws.Add(iter.WatchCh()) 1424 1425 return iter, nil 1426 } 1427 1428 // NodeBySecretID is used to lookup a node by SecretID 1429 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 1430 txn := s.db.ReadTxn() 1431 1432 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 1433 if err != nil { 1434 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 1435 } 1436 ws.Add(watchCh) 1437 1438 if existing != nil { 1439 return existing.(*structs.Node), nil 1440 } 1441 return nil, nil 1442 } 1443 1444 // Nodes returns an iterator over all the nodes 1445 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1446 txn := s.db.ReadTxn() 1447 1448 // Walk the entire nodes table 1449 iter, err := txn.Get("nodes", "id") 1450 if err != nil { 1451 return nil, err 1452 } 1453 ws.Add(iter.WatchCh()) 1454 return iter, nil 1455 } 1456 1457 // UpsertJob is used to register a job or update a job definition 1458 func (s *StateStore) UpsertJob(msgType structs.MessageType, index uint64, job *structs.Job) error { 1459 txn := s.db.WriteTxnMsgT(msgType, index) 1460 defer txn.Abort() 1461 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 1462 return err 1463 } 1464 return txn.Commit() 1465 } 1466 1467 // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob, 1468 // but in a transaction. Useful for when making multiple modifications atomically 1469 func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error { 1470 return s.upsertJobImpl(index, job, false, txn) 1471 } 1472 1473 // upsertJobImpl is the implementation for registering a job or updating a job definition 1474 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *txn) error { 1475 // Assert the namespace exists 1476 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 1477 return err 1478 } else if !exists { 1479 return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace) 1480 } 1481 1482 // Check if the job already exists 1483 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 1484 var existingJob *structs.Job 1485 if err != nil { 1486 return fmt.Errorf("job lookup failed: %v", err) 1487 } 1488 1489 // Setup the indexes correctly 1490 if existing != nil { 1491 job.CreateIndex = existing.(*structs.Job).CreateIndex 1492 job.ModifyIndex = index 1493 1494 existingJob = existing.(*structs.Job) 1495 1496 // Bump the version unless asked to keep it. This should only be done 1497 // when changing an internal field such as Stable. A spec change should 1498 // always come with a version bump 1499 if !keepVersion { 1500 job.JobModifyIndex = index 1501 if job.Version <= existingJob.Version { 1502 job.Version = existingJob.Version + 1 1503 } 1504 } 1505 1506 // Compute the job status 1507 var err error 1508 job.Status, err = s.getJobStatus(txn, job, false) 1509 if err != nil { 1510 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1511 } 1512 } else { 1513 job.CreateIndex = index 1514 job.ModifyIndex = index 1515 job.JobModifyIndex = index 1516 1517 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 1518 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1519 } 1520 1521 // Have to get the job again since it could have been updated 1522 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 1523 if err != nil { 1524 return fmt.Errorf("job lookup failed: %v", err) 1525 } 1526 if updated != nil { 1527 job = updated.(*structs.Job) 1528 } 1529 } 1530 1531 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 1532 return fmt.Errorf("unable to create job summary: %v", err) 1533 } 1534 1535 if err := s.upsertJobVersion(index, job, txn); err != nil { 1536 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 1537 } 1538 1539 if err := s.updateJobScalingPolicies(index, job, txn); err != nil { 1540 return fmt.Errorf("unable to update job scaling policies: %v", err) 1541 } 1542 1543 if err := s.updateJobRecommendations(index, txn, existingJob, job); err != nil { 1544 return fmt.Errorf("unable to update job recommendations: %v", err) 1545 } 1546 1547 if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil { 1548 return fmt.Errorf("unable to update job scaling policies: %v", err) 1549 } 1550 1551 // Insert the job 1552 if err := txn.Insert("jobs", job); err != nil { 1553 return fmt.Errorf("job insert failed: %v", err) 1554 } 1555 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1556 return fmt.Errorf("index update failed: %v", err) 1557 } 1558 1559 return nil 1560 } 1561 1562 // DeleteJob is used to deregister a job 1563 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 1564 txn := s.db.WriteTxn(index) 1565 defer txn.Abort() 1566 1567 err := s.DeleteJobTxn(index, namespace, jobID, txn) 1568 if err == nil { 1569 return txn.Commit() 1570 } 1571 return err 1572 } 1573 1574 // DeleteJobTxn is used to deregister a job, like DeleteJob, 1575 // but in a transaction. Useful for when making multiple modifications atomically 1576 func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error { 1577 // Lookup the node 1578 existing, err := txn.First("jobs", "id", namespace, jobID) 1579 if err != nil { 1580 return fmt.Errorf("job lookup failed: %v", err) 1581 } 1582 if existing == nil { 1583 return fmt.Errorf("job not found") 1584 } 1585 1586 // Check if we should update a parent job summary 1587 job := existing.(*structs.Job) 1588 if job.ParentID != "" { 1589 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 1590 if err != nil { 1591 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 1592 } 1593 1594 // Only continue if the summary exists. It could not exist if the parent 1595 // job was removed 1596 if summaryRaw != nil { 1597 existing := summaryRaw.(*structs.JobSummary) 1598 pSummary := existing.Copy() 1599 if pSummary.Children != nil { 1600 1601 modified := false 1602 switch job.Status { 1603 case structs.JobStatusPending: 1604 pSummary.Children.Pending-- 1605 pSummary.Children.Dead++ 1606 modified = true 1607 case structs.JobStatusRunning: 1608 pSummary.Children.Running-- 1609 pSummary.Children.Dead++ 1610 modified = true 1611 case structs.JobStatusDead: 1612 default: 1613 return fmt.Errorf("unknown old job status %q", job.Status) 1614 } 1615 1616 if modified { 1617 // Update the modify index 1618 pSummary.ModifyIndex = index 1619 1620 // Insert the summary 1621 if err := txn.Insert("job_summary", pSummary); err != nil { 1622 return fmt.Errorf("job summary insert failed: %v", err) 1623 } 1624 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1625 return fmt.Errorf("index update failed: %v", err) 1626 } 1627 } 1628 } 1629 } 1630 } 1631 1632 // Delete the job 1633 if err := txn.Delete("jobs", existing); err != nil { 1634 return fmt.Errorf("job delete failed: %v", err) 1635 } 1636 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1637 return fmt.Errorf("index update failed: %v", err) 1638 } 1639 1640 // Delete the job versions 1641 if err := s.deleteJobVersions(index, job, txn); err != nil { 1642 return err 1643 } 1644 1645 // Cleanup plugins registered by this job, before we delete the summary 1646 err = s.deleteJobFromPlugins(index, txn, job) 1647 if err != nil { 1648 return fmt.Errorf("deleting job from plugin: %v", err) 1649 } 1650 1651 // Delete the job summary 1652 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 1653 return fmt.Errorf("deleting job summary failed: %v", err) 1654 } 1655 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1656 return fmt.Errorf("index update failed: %v", err) 1657 } 1658 1659 // Delete any remaining job scaling policies 1660 if err := s.deleteJobScalingPolicies(index, job, txn); err != nil { 1661 return fmt.Errorf("deleting job scaling policies failed: %v", err) 1662 } 1663 1664 // Delete any job recommendations 1665 if err := s.deleteRecommendationsByJob(index, txn, job); err != nil { 1666 return fmt.Errorf("deleting job recommendatons failed: %v", err) 1667 } 1668 1669 // Delete the scaling events 1670 if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil { 1671 return fmt.Errorf("deleting job scaling events failed: %v", err) 1672 } 1673 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 1674 return fmt.Errorf("index update failed: %v", err) 1675 } 1676 1677 return nil 1678 } 1679 1680 // deleteJobScalingPolicies deletes any scaling policies associated with the job 1681 func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 1682 iter, err := s.ScalingPoliciesByJobTxn(nil, job.Namespace, job.ID, txn) 1683 if err != nil { 1684 return fmt.Errorf("getting job scaling policies for deletion failed: %v", err) 1685 } 1686 1687 // Put them into a slice so there are no safety concerns while actually 1688 // performing the deletes 1689 policies := []interface{}{} 1690 for { 1691 raw := iter.Next() 1692 if raw == nil { 1693 break 1694 } 1695 policies = append(policies, raw) 1696 } 1697 1698 // Do the deletes 1699 for _, p := range policies { 1700 if err := txn.Delete("scaling_policy", p); err != nil { 1701 return fmt.Errorf("deleting scaling policy failed: %v", err) 1702 } 1703 } 1704 1705 if len(policies) > 0 { 1706 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 1707 return fmt.Errorf("index update failed: %v", err) 1708 } 1709 } 1710 return nil 1711 } 1712 1713 // deleteJobVersions deletes all versions of the given job. 1714 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *txn) error { 1715 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 1716 if err != nil { 1717 return err 1718 } 1719 1720 // Put them into a slice so there are no safety concerns while actually 1721 // performing the deletes 1722 jobs := []*structs.Job{} 1723 for { 1724 raw := iter.Next() 1725 if raw == nil { 1726 break 1727 } 1728 1729 // Ensure the ID is an exact match 1730 j := raw.(*structs.Job) 1731 if j.ID != job.ID { 1732 continue 1733 } 1734 1735 jobs = append(jobs, j) 1736 } 1737 1738 // Do the deletes 1739 for _, j := range jobs { 1740 if err := txn.Delete("job_version", j); err != nil { 1741 return fmt.Errorf("deleting job versions failed: %v", err) 1742 } 1743 } 1744 1745 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1746 return fmt.Errorf("index update failed: %v", err) 1747 } 1748 1749 return nil 1750 } 1751 1752 // upsertJobVersion inserts a job into its historic version table and limits the 1753 // number of job versions that are tracked. 1754 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *txn) error { 1755 // Insert the job 1756 if err := txn.Insert("job_version", job); err != nil { 1757 return fmt.Errorf("failed to insert job into job_version table: %v", err) 1758 } 1759 1760 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1761 return fmt.Errorf("index update failed: %v", err) 1762 } 1763 1764 // Get all the historic jobs for this ID 1765 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 1766 if err != nil { 1767 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 1768 } 1769 1770 // If we are below the limit there is no GCing to be done 1771 if len(all) <= structs.JobTrackedVersions { 1772 return nil 1773 } 1774 1775 // We have to delete a historic job to make room. 1776 // Find index of the highest versioned stable job 1777 stableIdx := -1 1778 for i, j := range all { 1779 if j.Stable { 1780 stableIdx = i 1781 break 1782 } 1783 } 1784 1785 // If the stable job is the oldest version, do a swap to bring it into the 1786 // keep set. 1787 max := structs.JobTrackedVersions 1788 if stableIdx == max { 1789 all[max-1], all[max] = all[max], all[max-1] 1790 } 1791 1792 // Delete the job outside of the set that are being kept. 1793 d := all[max] 1794 if err := txn.Delete("job_version", d); err != nil { 1795 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 1796 } 1797 1798 return nil 1799 } 1800 1801 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 1802 // version. 1803 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 1804 txn := s.db.ReadTxn() 1805 return s.JobByIDTxn(ws, namespace, id, txn) 1806 } 1807 1808 // JobByIDTxn is used to lookup a job by its ID, like JobByID. JobByID returns the job version 1809 // accessible through in the transaction 1810 func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) { 1811 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 1812 if err != nil { 1813 return nil, fmt.Errorf("job lookup failed: %v", err) 1814 } 1815 ws.Add(watchCh) 1816 1817 if existing != nil { 1818 return existing.(*structs.Job), nil 1819 } 1820 return nil, nil 1821 } 1822 1823 // JobsByIDPrefix is used to lookup a job by prefix 1824 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1825 txn := s.db.ReadTxn() 1826 1827 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 1828 if err != nil { 1829 return nil, fmt.Errorf("job lookup failed: %v", err) 1830 } 1831 1832 ws.Add(iter.WatchCh()) 1833 1834 return iter, nil 1835 } 1836 1837 // JobVersionsByID returns all the tracked versions of a job. 1838 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1839 txn := s.db.ReadTxn() 1840 1841 return s.jobVersionByID(txn, ws, namespace, id) 1842 } 1843 1844 // jobVersionByID is the underlying implementation for retrieving all tracked 1845 // versions of a job and is called under an existing transaction. A watch set 1846 // can optionally be passed in to add the job histories to the watch set. 1847 func (s *StateStore) jobVersionByID(txn *txn, ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1848 // Get all the historic jobs for this ID 1849 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 1850 if err != nil { 1851 return nil, err 1852 } 1853 1854 ws.Add(iter.WatchCh()) 1855 1856 var all []*structs.Job 1857 for { 1858 raw := iter.Next() 1859 if raw == nil { 1860 break 1861 } 1862 1863 // Ensure the ID is an exact match 1864 j := raw.(*structs.Job) 1865 if j.ID != id { 1866 continue 1867 } 1868 1869 all = append(all, j) 1870 } 1871 1872 // Sort in reverse order so that the highest version is first 1873 sort.Slice(all, func(i, j int) bool { 1874 return all[i].Version > all[j].Version 1875 }) 1876 1877 return all, nil 1878 } 1879 1880 // JobByIDAndVersion returns the job identified by its ID and Version. The 1881 // passed watchset may be nil. 1882 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 1883 txn := s.db.ReadTxn() 1884 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 1885 } 1886 1887 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 1888 // passed watchset may be nil. 1889 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 1890 version uint64, txn *txn) (*structs.Job, error) { 1891 1892 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 1893 if err != nil { 1894 return nil, err 1895 } 1896 1897 ws.Add(watchCh) 1898 1899 if existing != nil { 1900 job := existing.(*structs.Job) 1901 return job, nil 1902 } 1903 1904 return nil, nil 1905 } 1906 1907 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1908 txn := s.db.ReadTxn() 1909 1910 // Walk the entire deployments table 1911 iter, err := txn.Get("job_version", "id") 1912 if err != nil { 1913 return nil, err 1914 } 1915 1916 ws.Add(iter.WatchCh()) 1917 return iter, nil 1918 } 1919 1920 // Jobs returns an iterator over all the jobs 1921 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1922 txn := s.db.ReadTxn() 1923 1924 // Walk the entire jobs table 1925 iter, err := txn.Get("jobs", "id") 1926 if err != nil { 1927 return nil, err 1928 } 1929 1930 ws.Add(iter.WatchCh()) 1931 1932 return iter, nil 1933 } 1934 1935 // JobsByNamespace returns an iterator over all the jobs for the given namespace 1936 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1937 txn := s.db.ReadTxn() 1938 return s.jobsByNamespaceImpl(ws, namespace, txn) 1939 } 1940 1941 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 1942 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *txn) (memdb.ResultIterator, error) { 1943 // Walk the entire jobs table 1944 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 1945 if err != nil { 1946 return nil, err 1947 } 1948 1949 ws.Add(iter.WatchCh()) 1950 1951 return iter, nil 1952 } 1953 1954 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 1955 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 1956 txn := s.db.ReadTxn() 1957 1958 iter, err := txn.Get("jobs", "periodic", periodic) 1959 if err != nil { 1960 return nil, err 1961 } 1962 1963 ws.Add(iter.WatchCh()) 1964 1965 return iter, nil 1966 } 1967 1968 // JobsByScheduler returns an iterator over all the jobs with the specific 1969 // scheduler type. 1970 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 1971 txn := s.db.ReadTxn() 1972 1973 // Return an iterator for jobs with the specific type. 1974 iter, err := txn.Get("jobs", "type", schedulerType) 1975 if err != nil { 1976 return nil, err 1977 } 1978 1979 ws.Add(iter.WatchCh()) 1980 1981 return iter, nil 1982 } 1983 1984 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 1985 // collection. 1986 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 1987 txn := s.db.ReadTxn() 1988 1989 iter, err := txn.Get("jobs", "gc", gc) 1990 if err != nil { 1991 return nil, err 1992 } 1993 1994 ws.Add(iter.WatchCh()) 1995 1996 return iter, nil 1997 } 1998 1999 // JobSummary returns a job summary object which matches a specific id. 2000 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 2001 txn := s.db.ReadTxn() 2002 2003 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 2004 if err != nil { 2005 return nil, err 2006 } 2007 2008 ws.Add(watchCh) 2009 2010 if existing != nil { 2011 summary := existing.(*structs.JobSummary) 2012 return summary, nil 2013 } 2014 2015 return nil, nil 2016 } 2017 2018 // JobSummaries walks the entire job summary table and returns all the job 2019 // summary objects 2020 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2021 txn := s.db.ReadTxn() 2022 2023 iter, err := txn.Get("job_summary", "id") 2024 if err != nil { 2025 return nil, err 2026 } 2027 2028 ws.Add(iter.WatchCh()) 2029 2030 return iter, nil 2031 } 2032 2033 // JobSummaryByPrefix is used to look up Job Summary by id prefix 2034 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2035 txn := s.db.ReadTxn() 2036 2037 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 2038 if err != nil { 2039 return nil, fmt.Errorf("job_summary lookup failed: %v", err) 2040 } 2041 2042 ws.Add(iter.WatchCh()) 2043 2044 return iter, nil 2045 } 2046 2047 // CSIVolumeRegister adds a volume to the server store, failing if it already exists 2048 func (s *StateStore) CSIVolumeRegister(index uint64, volumes []*structs.CSIVolume) error { 2049 txn := s.db.WriteTxn(index) 2050 defer txn.Abort() 2051 2052 for _, v := range volumes { 2053 if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { 2054 return err 2055 } else if !exists { 2056 return fmt.Errorf("volume %s is in nonexistent namespace %s", v.ID, v.Namespace) 2057 } 2058 2059 // Check for volume existence 2060 obj, err := txn.First("csi_volumes", "id", v.Namespace, v.ID) 2061 if err != nil { 2062 return fmt.Errorf("volume existence check error: %v", err) 2063 } 2064 if obj != nil { 2065 // Allow some properties of a volume to be updated in place, but 2066 // prevent accidentally overwriting important properties, or 2067 // overwriting a volume in use 2068 old, ok := obj.(*structs.CSIVolume) 2069 if ok && 2070 old.InUse() || 2071 old.ExternalID != v.ExternalID || 2072 old.PluginID != v.PluginID || 2073 old.Provider != v.Provider { 2074 return fmt.Errorf("volume exists: %s", v.ID) 2075 } 2076 } 2077 2078 if v.CreateIndex == 0 { 2079 v.CreateIndex = index 2080 v.ModifyIndex = index 2081 } 2082 2083 // Allocations are copy on write, so we want to keep the Allocation ID 2084 // but we need to clear the pointer so that we don't store it when we 2085 // write the volume to the state store. We'll get it from the db in 2086 // denormalize. 2087 for allocID := range v.ReadAllocs { 2088 v.ReadAllocs[allocID] = nil 2089 } 2090 for allocID := range v.WriteAllocs { 2091 v.WriteAllocs[allocID] = nil 2092 } 2093 2094 err = txn.Insert("csi_volumes", v) 2095 if err != nil { 2096 return fmt.Errorf("volume insert: %v", err) 2097 } 2098 } 2099 2100 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2101 return fmt.Errorf("index update failed: %v", err) 2102 } 2103 2104 return txn.Commit() 2105 } 2106 2107 // CSIVolumes returns the unfiltered list of all volumes. Caller should 2108 // snapshot if it wants to also denormalize the plugins. 2109 func (s *StateStore) CSIVolumes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2110 txn := s.db.ReadTxn() 2111 defer txn.Abort() 2112 2113 iter, err := txn.Get("csi_volumes", "id") 2114 if err != nil { 2115 return nil, fmt.Errorf("csi_volumes lookup failed: %v", err) 2116 } 2117 2118 ws.Add(iter.WatchCh()) 2119 2120 return iter, nil 2121 } 2122 2123 // CSIVolumeByID is used to lookup a single volume. Returns a copy of the 2124 // volume because its plugins and allocations are denormalized to provide 2125 // accurate Health. 2126 func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*structs.CSIVolume, error) { 2127 txn := s.db.ReadTxn() 2128 2129 watchCh, obj, err := txn.FirstWatch("csi_volumes", "id_prefix", namespace, id) 2130 if err != nil { 2131 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 2132 } 2133 2134 ws.Add(watchCh) 2135 2136 if obj == nil { 2137 return nil, nil 2138 } 2139 2140 // we return the volume with the plugins denormalized by default, 2141 // because the scheduler needs them for feasibility checking 2142 vol := obj.(*structs.CSIVolume) 2143 return s.CSIVolumeDenormalizePluginsTxn(txn, vol.Copy()) 2144 } 2145 2146 // CSIVolumes looks up csi_volumes by pluginID. Caller should snapshot if it 2147 // wants to also denormalize the plugins. 2148 func (s *StateStore) CSIVolumesByPluginID(ws memdb.WatchSet, namespace, pluginID string) (memdb.ResultIterator, error) { 2149 txn := s.db.ReadTxn() 2150 2151 iter, err := txn.Get("csi_volumes", "plugin_id", pluginID) 2152 if err != nil { 2153 return nil, fmt.Errorf("volume lookup failed: %v", err) 2154 } 2155 2156 // Filter the iterator by namespace 2157 f := func(raw interface{}) bool { 2158 v, ok := raw.(*structs.CSIVolume) 2159 if !ok { 2160 return false 2161 } 2162 return v.Namespace != namespace 2163 } 2164 2165 wrap := memdb.NewFilterIterator(iter, f) 2166 return wrap, nil 2167 } 2168 2169 // CSIVolumesByIDPrefix supports search. Caller should snapshot if it wants to 2170 // also denormalize the plugins. 2171 func (s *StateStore) CSIVolumesByIDPrefix(ws memdb.WatchSet, namespace, volumeID string) (memdb.ResultIterator, error) { 2172 txn := s.db.ReadTxn() 2173 2174 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, volumeID) 2175 if err != nil { 2176 return nil, err 2177 } 2178 2179 ws.Add(iter.WatchCh()) 2180 2181 return iter, nil 2182 } 2183 2184 // CSIVolumesByNodeID looks up CSIVolumes in use on a node. Caller should 2185 // snapshot if it wants to also denormalize the plugins. 2186 func (s *StateStore) CSIVolumesByNodeID(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 2187 allocs, err := s.AllocsByNode(ws, nodeID) 2188 if err != nil { 2189 return nil, fmt.Errorf("alloc lookup failed: %v", err) 2190 } 2191 2192 // Find volume ids for CSI volumes in running allocs, or allocs that we desire to run 2193 ids := map[string]string{} // Map volumeID to Namespace 2194 for _, a := range allocs { 2195 tg := a.Job.LookupTaskGroup(a.TaskGroup) 2196 2197 if !(a.DesiredStatus == structs.AllocDesiredStatusRun || 2198 a.ClientStatus == structs.AllocClientStatusRunning) || 2199 len(tg.Volumes) == 0 { 2200 continue 2201 } 2202 2203 for _, v := range tg.Volumes { 2204 if v.Type != structs.VolumeTypeCSI { 2205 continue 2206 } 2207 ids[v.Source] = a.Namespace 2208 } 2209 } 2210 2211 // Lookup the raw CSIVolumes to match the other list interfaces 2212 iter := NewSliceIterator() 2213 txn := s.db.ReadTxn() 2214 for id, namespace := range ids { 2215 raw, err := txn.First("csi_volumes", "id", namespace, id) 2216 if err != nil { 2217 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 2218 } 2219 iter.Add(raw) 2220 } 2221 2222 ws.Add(iter.WatchCh()) 2223 2224 return iter, nil 2225 } 2226 2227 // CSIVolumesByNamespace looks up the entire csi_volumes table 2228 func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2229 txn := s.db.ReadTxn() 2230 2231 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, "") 2232 if err != nil { 2233 return nil, fmt.Errorf("volume lookup failed: %v", err) 2234 } 2235 2236 ws.Add(iter.WatchCh()) 2237 2238 return iter, nil 2239 } 2240 2241 // CSIVolumeClaim updates the volume's claim count and allocation list 2242 func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error { 2243 txn := s.db.WriteTxn(index) 2244 defer txn.Abort() 2245 2246 row, err := txn.First("csi_volumes", "id", namespace, id) 2247 if err != nil { 2248 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2249 } 2250 if row == nil { 2251 return fmt.Errorf("volume not found: %s", id) 2252 } 2253 2254 orig, ok := row.(*structs.CSIVolume) 2255 if !ok { 2256 return fmt.Errorf("volume row conversion error") 2257 } 2258 2259 var alloc *structs.Allocation 2260 if claim.State == structs.CSIVolumeClaimStateTaken { 2261 alloc, err = s.allocByIDImpl(txn, nil, claim.AllocationID) 2262 if err != nil { 2263 s.logger.Error("AllocByID failed", "error", err) 2264 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2265 } 2266 if alloc == nil { 2267 s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID) 2268 if err != nil { 2269 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2270 } 2271 } 2272 } 2273 2274 volume, err := s.CSIVolumeDenormalizePluginsTxn(txn, orig.Copy()) 2275 if err != nil { 2276 return err 2277 } 2278 volume, err = s.CSIVolumeDenormalizeTxn(txn, nil, volume) 2279 if err != nil { 2280 return err 2281 } 2282 2283 // in the case of a job deregistration, there will be no allocation ID 2284 // for the claim but we still want to write an updated index to the volume 2285 // so that volume reaping is triggered 2286 if claim.AllocationID != "" { 2287 err = volume.Claim(claim, alloc) 2288 if err != nil { 2289 return err 2290 } 2291 } 2292 2293 volume.ModifyIndex = index 2294 2295 // Allocations are copy on write, so we want to keep the Allocation ID 2296 // but we need to clear the pointer so that we don't store it when we 2297 // write the volume to the state store. We'll get it from the db in 2298 // denormalize. 2299 for allocID := range volume.ReadAllocs { 2300 volume.ReadAllocs[allocID] = nil 2301 } 2302 for allocID := range volume.WriteAllocs { 2303 volume.WriteAllocs[allocID] = nil 2304 } 2305 2306 if err = txn.Insert("csi_volumes", volume); err != nil { 2307 return fmt.Errorf("volume update failed: %s: %v", id, err) 2308 } 2309 2310 if err = txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2311 return fmt.Errorf("index update failed: %v", err) 2312 } 2313 2314 return txn.Commit() 2315 } 2316 2317 // CSIVolumeDeregister removes the volume from the server 2318 func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []string, force bool) error { 2319 txn := s.db.WriteTxn(index) 2320 defer txn.Abort() 2321 2322 for _, id := range ids { 2323 existing, err := txn.First("csi_volumes", "id_prefix", namespace, id) 2324 if err != nil { 2325 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2326 } 2327 2328 if existing == nil { 2329 return fmt.Errorf("volume not found: %s", id) 2330 } 2331 2332 vol, ok := existing.(*structs.CSIVolume) 2333 if !ok { 2334 return fmt.Errorf("volume row conversion error: %s", id) 2335 } 2336 2337 // The common case for a volume deregister is when the volume is 2338 // unused, but we can also let an operator intervene in the case where 2339 // allocations have been stopped but claims can't be freed because 2340 // ex. the plugins have all been removed. 2341 if vol.InUse() { 2342 if !force || !s.volSafeToForce(txn, vol) { 2343 return fmt.Errorf("volume in use: %s", id) 2344 } 2345 } 2346 2347 if err = txn.Delete("csi_volumes", existing); err != nil { 2348 return fmt.Errorf("volume delete failed: %s: %v", id, err) 2349 } 2350 } 2351 2352 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2353 return fmt.Errorf("index update failed: %v", err) 2354 } 2355 2356 return txn.Commit() 2357 } 2358 2359 // volSafeToForce checks if the any of the remaining allocations 2360 // are in a non-terminal state. 2361 func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool { 2362 vol, err := s.CSIVolumeDenormalizeTxn(txn, nil, v) 2363 if err != nil { 2364 return false 2365 } 2366 2367 for _, alloc := range vol.ReadAllocs { 2368 if alloc != nil && !alloc.TerminalStatus() { 2369 return false 2370 } 2371 } 2372 for _, alloc := range vol.WriteAllocs { 2373 if alloc != nil && !alloc.TerminalStatus() { 2374 return false 2375 } 2376 } 2377 return true 2378 } 2379 2380 // CSIVolumeDenormalizePlugins returns a CSIVolume with current health and 2381 // plugins, but without allocations. 2382 // Use this for current volume metadata, handling lists of volumes. 2383 // Use CSIVolumeDenormalize for volumes containing both health and current 2384 // allocations. 2385 func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2386 if vol == nil { 2387 return nil, nil 2388 } 2389 txn := s.db.ReadTxn() 2390 defer txn.Abort() 2391 return s.CSIVolumeDenormalizePluginsTxn(txn, vol) 2392 } 2393 2394 // CSIVolumeDenormalizePluginsTxn returns a CSIVolume with current health and 2395 // plugins, but without allocations. 2396 // Use this for current volume metadata, handling lists of volumes. 2397 // Use CSIVolumeDenormalize for volumes containing both health and current 2398 // allocations. 2399 func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2400 if vol == nil { 2401 return nil, nil 2402 } 2403 plug, err := s.CSIPluginByIDTxn(txn, nil, vol.PluginID) 2404 if err != nil { 2405 return nil, fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err) 2406 } 2407 if plug == nil { 2408 vol.ControllersHealthy = 0 2409 vol.NodesHealthy = 0 2410 vol.Schedulable = false 2411 return vol, nil 2412 } 2413 2414 vol.Provider = plug.Provider 2415 vol.ProviderVersion = plug.Version 2416 vol.ControllerRequired = plug.ControllerRequired 2417 vol.ControllersHealthy = plug.ControllersHealthy 2418 vol.NodesHealthy = plug.NodesHealthy 2419 2420 // This value may be stale, but stale is ok 2421 vol.ControllersExpected = plug.ControllersExpected 2422 vol.NodesExpected = plug.NodesExpected 2423 2424 vol.Schedulable = vol.NodesHealthy > 0 2425 if vol.ControllerRequired { 2426 vol.Schedulable = vol.ControllersHealthy > 0 && vol.Schedulable 2427 } 2428 2429 return vol, nil 2430 } 2431 2432 // CSIVolumeDenormalize returns a CSIVolume with allocations 2433 func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2434 txn := s.db.ReadTxn() 2435 return s.CSIVolumeDenormalizeTxn(txn, ws, vol) 2436 } 2437 2438 // CSIVolumeDenormalizeTxn populates a CSIVolume with allocations 2439 func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2440 if vol == nil { 2441 return nil, nil 2442 } 2443 for id := range vol.ReadAllocs { 2444 a, err := s.allocByIDImpl(txn, ws, id) 2445 if err != nil { 2446 return nil, err 2447 } 2448 if a != nil { 2449 vol.ReadAllocs[id] = a 2450 // COMPAT(1.0): the CSIVolumeClaim fields were added 2451 // after 0.11.1, so claims made before that may be 2452 // missing this value. (same for WriteAlloc below) 2453 if _, ok := vol.ReadClaims[id]; !ok { 2454 vol.ReadClaims[id] = &structs.CSIVolumeClaim{ 2455 AllocationID: a.ID, 2456 NodeID: a.NodeID, 2457 Mode: structs.CSIVolumeClaimRead, 2458 State: structs.CSIVolumeClaimStateTaken, 2459 } 2460 } 2461 } 2462 } 2463 2464 for id := range vol.WriteAllocs { 2465 a, err := s.allocByIDImpl(txn, ws, id) 2466 if err != nil { 2467 return nil, err 2468 } 2469 if a != nil { 2470 vol.WriteAllocs[id] = a 2471 if _, ok := vol.WriteClaims[id]; !ok { 2472 vol.WriteClaims[id] = &structs.CSIVolumeClaim{ 2473 AllocationID: a.ID, 2474 NodeID: a.NodeID, 2475 Mode: structs.CSIVolumeClaimWrite, 2476 State: structs.CSIVolumeClaimStateTaken, 2477 } 2478 } 2479 } 2480 } 2481 2482 return vol, nil 2483 } 2484 2485 // CSIPlugins returns the unfiltered list of all plugin health status 2486 func (s *StateStore) CSIPlugins(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2487 txn := s.db.ReadTxn() 2488 defer txn.Abort() 2489 2490 iter, err := txn.Get("csi_plugins", "id") 2491 if err != nil { 2492 return nil, fmt.Errorf("csi_plugins lookup failed: %v", err) 2493 } 2494 2495 ws.Add(iter.WatchCh()) 2496 2497 return iter, nil 2498 } 2499 2500 // CSIPluginsByIDPrefix supports search 2501 func (s *StateStore) CSIPluginsByIDPrefix(ws memdb.WatchSet, pluginID string) (memdb.ResultIterator, error) { 2502 txn := s.db.ReadTxn() 2503 2504 iter, err := txn.Get("csi_plugins", "id_prefix", pluginID) 2505 if err != nil { 2506 return nil, err 2507 } 2508 2509 ws.Add(iter.WatchCh()) 2510 2511 return iter, nil 2512 } 2513 2514 // CSIPluginByID returns a named CSIPlugin. This method creates a new 2515 // transaction so you should not call it from within another transaction. 2516 func (s *StateStore) CSIPluginByID(ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2517 txn := s.db.ReadTxn() 2518 plugin, err := s.CSIPluginByIDTxn(txn, ws, id) 2519 if err != nil { 2520 return nil, err 2521 } 2522 return plugin, nil 2523 } 2524 2525 // CSIPluginByIDTxn returns a named CSIPlugin 2526 func (s *StateStore) CSIPluginByIDTxn(txn Txn, ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2527 2528 watchCh, obj, err := txn.FirstWatch("csi_plugins", "id_prefix", id) 2529 if err != nil { 2530 return nil, fmt.Errorf("csi_plugin lookup failed: %s %v", id, err) 2531 } 2532 2533 ws.Add(watchCh) 2534 2535 if obj != nil { 2536 return obj.(*structs.CSIPlugin), nil 2537 } 2538 return nil, nil 2539 } 2540 2541 // CSIPluginDenormalize returns a CSIPlugin with allocation details. Always called on a copy of the plugin. 2542 func (s *StateStore) CSIPluginDenormalize(ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2543 txn := s.db.ReadTxn() 2544 return s.CSIPluginDenormalizeTxn(txn, ws, plug) 2545 } 2546 2547 func (s *StateStore) CSIPluginDenormalizeTxn(txn Txn, ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2548 if plug == nil { 2549 return nil, nil 2550 } 2551 2552 // Get the unique list of allocation ids 2553 ids := map[string]struct{}{} 2554 for _, info := range plug.Controllers { 2555 ids[info.AllocID] = struct{}{} 2556 } 2557 for _, info := range plug.Nodes { 2558 ids[info.AllocID] = struct{}{} 2559 } 2560 2561 for id := range ids { 2562 alloc, err := s.allocByIDImpl(txn, ws, id) 2563 if err != nil { 2564 return nil, err 2565 } 2566 if alloc == nil { 2567 continue 2568 } 2569 plug.Allocations = append(plug.Allocations, alloc.Stub(nil)) 2570 } 2571 2572 return plug, nil 2573 } 2574 2575 // UpsertCSIPlugin writes the plugin to the state store. Note: there 2576 // is currently no raft message for this, as it's intended to support 2577 // testing use cases. 2578 func (s *StateStore) UpsertCSIPlugin(index uint64, plug *structs.CSIPlugin) error { 2579 txn := s.db.WriteTxn(index) 2580 defer txn.Abort() 2581 2582 existing, err := txn.First("csi_plugins", "id", plug.ID) 2583 if err != nil { 2584 return fmt.Errorf("csi_plugin lookup error: %s %v", plug.ID, err) 2585 } 2586 2587 plug.ModifyIndex = index 2588 if existing != nil { 2589 plug.CreateIndex = existing.(*structs.CSIPlugin).CreateIndex 2590 } 2591 2592 err = txn.Insert("csi_plugins", plug) 2593 if err != nil { 2594 return fmt.Errorf("csi_plugins insert error: %v", err) 2595 } 2596 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 2597 return fmt.Errorf("index update failed: %v", err) 2598 } 2599 return txn.Commit() 2600 } 2601 2602 // DeleteCSIPlugin deletes the plugin if it's not in use. 2603 func (s *StateStore) DeleteCSIPlugin(index uint64, id string) error { 2604 txn := s.db.WriteTxn(index) 2605 defer txn.Abort() 2606 2607 plug, err := s.CSIPluginByIDTxn(txn, nil, id) 2608 if err != nil { 2609 return err 2610 } 2611 2612 if plug == nil { 2613 return nil 2614 } 2615 2616 plug, err = s.CSIPluginDenormalizeTxn(txn, nil, plug.Copy()) 2617 if err != nil { 2618 return err 2619 } 2620 if !plug.IsEmpty() { 2621 return fmt.Errorf("plugin in use") 2622 } 2623 2624 err = txn.Delete("csi_plugins", plug) 2625 if err != nil { 2626 return fmt.Errorf("csi_plugins delete error: %v", err) 2627 } 2628 return txn.Commit() 2629 } 2630 2631 // UpsertPeriodicLaunch is used to register a launch or update it. 2632 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 2633 txn := s.db.WriteTxn(index) 2634 defer txn.Abort() 2635 2636 // Check if the job already exists 2637 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 2638 if err != nil { 2639 return fmt.Errorf("periodic launch lookup failed: %v", err) 2640 } 2641 2642 // Setup the indexes correctly 2643 if existing != nil { 2644 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 2645 launch.ModifyIndex = index 2646 } else { 2647 launch.CreateIndex = index 2648 launch.ModifyIndex = index 2649 } 2650 2651 // Insert the job 2652 if err := txn.Insert("periodic_launch", launch); err != nil { 2653 return fmt.Errorf("launch insert failed: %v", err) 2654 } 2655 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2656 return fmt.Errorf("index update failed: %v", err) 2657 } 2658 2659 return txn.Commit() 2660 } 2661 2662 // DeletePeriodicLaunch is used to delete the periodic launch 2663 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 2664 txn := s.db.WriteTxn(index) 2665 defer txn.Abort() 2666 2667 err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn) 2668 if err == nil { 2669 return txn.Commit() 2670 } 2671 return err 2672 } 2673 2674 // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch 2675 // but in a transaction. Useful for when making multiple modifications atomically 2676 func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error { 2677 // Lookup the launch 2678 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 2679 if err != nil { 2680 return fmt.Errorf("launch lookup failed: %v", err) 2681 } 2682 if existing == nil { 2683 return fmt.Errorf("launch not found") 2684 } 2685 2686 // Delete the launch 2687 if err := txn.Delete("periodic_launch", existing); err != nil { 2688 return fmt.Errorf("launch delete failed: %v", err) 2689 } 2690 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2691 return fmt.Errorf("index update failed: %v", err) 2692 } 2693 2694 return nil 2695 } 2696 2697 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 2698 // ID. 2699 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 2700 txn := s.db.ReadTxn() 2701 2702 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 2703 if err != nil { 2704 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 2705 } 2706 2707 ws.Add(watchCh) 2708 2709 if existing != nil { 2710 return existing.(*structs.PeriodicLaunch), nil 2711 } 2712 return nil, nil 2713 } 2714 2715 // PeriodicLaunches returns an iterator over all the periodic launches 2716 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2717 txn := s.db.ReadTxn() 2718 2719 // Walk the entire table 2720 iter, err := txn.Get("periodic_launch", "id") 2721 if err != nil { 2722 return nil, err 2723 } 2724 2725 ws.Add(iter.WatchCh()) 2726 2727 return iter, nil 2728 } 2729 2730 // UpsertEvals is used to upsert a set of evaluations 2731 func (s *StateStore) UpsertEvals(msgType structs.MessageType, index uint64, evals []*structs.Evaluation) error { 2732 txn := s.db.WriteTxnMsgT(msgType, index) 2733 defer txn.Abort() 2734 2735 err := s.UpsertEvalsTxn(index, evals, txn) 2736 if err == nil { 2737 return txn.Commit() 2738 } 2739 return err 2740 } 2741 2742 // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals 2743 // but in a transaction. Useful for when making multiple modifications atomically 2744 func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error { 2745 // Do a nested upsert 2746 jobs := make(map[structs.NamespacedID]string, len(evals)) 2747 for _, eval := range evals { 2748 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 2749 return err 2750 } 2751 2752 tuple := structs.NamespacedID{ 2753 ID: eval.JobID, 2754 Namespace: eval.Namespace, 2755 } 2756 jobs[tuple] = "" 2757 } 2758 2759 // Set the job's status 2760 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 2761 return fmt.Errorf("setting job status failed: %v", err) 2762 } 2763 2764 return nil 2765 } 2766 2767 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 2768 func (s *StateStore) nestedUpsertEval(txn *txn, index uint64, eval *structs.Evaluation) error { 2769 // Lookup the evaluation 2770 existing, err := txn.First("evals", "id", eval.ID) 2771 if err != nil { 2772 return fmt.Errorf("eval lookup failed: %v", err) 2773 } 2774 2775 // Update the indexes 2776 if existing != nil { 2777 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 2778 eval.ModifyIndex = index 2779 } else { 2780 eval.CreateIndex = index 2781 eval.ModifyIndex = index 2782 } 2783 2784 // Update the job summary 2785 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 2786 if err != nil { 2787 return fmt.Errorf("job summary lookup failed: %v", err) 2788 } 2789 if summaryRaw != nil { 2790 js := summaryRaw.(*structs.JobSummary).Copy() 2791 hasSummaryChanged := false 2792 for tg, num := range eval.QueuedAllocations { 2793 if summary, ok := js.Summary[tg]; ok { 2794 if summary.Queued != num { 2795 summary.Queued = num 2796 js.Summary[tg] = summary 2797 hasSummaryChanged = true 2798 } 2799 } else { 2800 s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace) 2801 } 2802 } 2803 2804 // Insert the job summary 2805 if hasSummaryChanged { 2806 js.ModifyIndex = index 2807 if err := txn.Insert("job_summary", js); err != nil { 2808 return fmt.Errorf("job summary insert failed: %v", err) 2809 } 2810 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2811 return fmt.Errorf("index update failed: %v", err) 2812 } 2813 } 2814 } 2815 2816 // Check if the job has any blocked evaluations and cancel them 2817 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 2818 // Get the blocked evaluation for a job if it exists 2819 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 2820 if err != nil { 2821 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 2822 } 2823 2824 var blocked []*structs.Evaluation 2825 for { 2826 raw := iter.Next() 2827 if raw == nil { 2828 break 2829 } 2830 blocked = append(blocked, raw.(*structs.Evaluation)) 2831 } 2832 2833 // Go through and update the evals 2834 for _, eval := range blocked { 2835 newEval := eval.Copy() 2836 newEval.Status = structs.EvalStatusCancelled 2837 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 2838 newEval.ModifyIndex = index 2839 2840 if err := txn.Insert("evals", newEval); err != nil { 2841 return fmt.Errorf("eval insert failed: %v", err) 2842 } 2843 } 2844 } 2845 2846 // Insert the eval 2847 if err := txn.Insert("evals", eval); err != nil { 2848 return fmt.Errorf("eval insert failed: %v", err) 2849 } 2850 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2851 return fmt.Errorf("index update failed: %v", err) 2852 } 2853 return nil 2854 } 2855 2856 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 2857 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 2858 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 2859 func (s *StateStore) updateEvalModifyIndex(txn *txn, index uint64, evalID string) error { 2860 // Lookup the evaluation 2861 existing, err := txn.First("evals", "id", evalID) 2862 if err != nil { 2863 return fmt.Errorf("eval lookup failed: %v", err) 2864 } 2865 if existing == nil { 2866 s.logger.Error("unable to find eval", "eval_id", evalID) 2867 return fmt.Errorf("unable to find eval id %q", evalID) 2868 } 2869 eval := existing.(*structs.Evaluation).Copy() 2870 // Update the indexes 2871 eval.ModifyIndex = index 2872 2873 // Insert the eval 2874 if err := txn.Insert("evals", eval); err != nil { 2875 return fmt.Errorf("eval insert failed: %v", err) 2876 } 2877 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2878 return fmt.Errorf("index update failed: %v", err) 2879 } 2880 return nil 2881 } 2882 2883 // DeleteEval is used to delete an evaluation 2884 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 2885 txn := s.db.WriteTxn(index) 2886 defer txn.Abort() 2887 2888 jobs := make(map[structs.NamespacedID]string, len(evals)) 2889 for _, eval := range evals { 2890 existing, err := txn.First("evals", "id", eval) 2891 if err != nil { 2892 return fmt.Errorf("eval lookup failed: %v", err) 2893 } 2894 if existing == nil { 2895 continue 2896 } 2897 if err := txn.Delete("evals", existing); err != nil { 2898 return fmt.Errorf("eval delete failed: %v", err) 2899 } 2900 eval := existing.(*structs.Evaluation) 2901 2902 tuple := structs.NamespacedID{ 2903 ID: eval.JobID, 2904 Namespace: eval.Namespace, 2905 } 2906 jobs[tuple] = "" 2907 } 2908 2909 for _, alloc := range allocs { 2910 raw, err := txn.First("allocs", "id", alloc) 2911 if err != nil { 2912 return fmt.Errorf("alloc lookup failed: %v", err) 2913 } 2914 if raw == nil { 2915 continue 2916 } 2917 if err := txn.Delete("allocs", raw); err != nil { 2918 return fmt.Errorf("alloc delete failed: %v", err) 2919 } 2920 } 2921 2922 // Update the indexes 2923 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2924 return fmt.Errorf("index update failed: %v", err) 2925 } 2926 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2927 return fmt.Errorf("index update failed: %v", err) 2928 } 2929 2930 // Set the job's status 2931 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 2932 return fmt.Errorf("setting job status failed: %v", err) 2933 } 2934 2935 return txn.Commit() 2936 } 2937 2938 // EvalByID is used to lookup an eval by its ID 2939 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 2940 txn := s.db.ReadTxn() 2941 2942 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 2943 if err != nil { 2944 return nil, fmt.Errorf("eval lookup failed: %v", err) 2945 } 2946 2947 ws.Add(watchCh) 2948 2949 if existing != nil { 2950 return existing.(*structs.Evaluation), nil 2951 } 2952 return nil, nil 2953 } 2954 2955 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 2956 // namespace 2957 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2958 txn := s.db.ReadTxn() 2959 2960 // Get an iterator over all evals by the id prefix 2961 iter, err := txn.Get("evals", "id_prefix", id) 2962 if err != nil { 2963 return nil, fmt.Errorf("eval lookup failed: %v", err) 2964 } 2965 2966 ws.Add(iter.WatchCh()) 2967 2968 // Wrap the iterator in a filter 2969 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 2970 return wrap, nil 2971 } 2972 2973 // evalNamespaceFilter returns a filter function that filters all evaluations 2974 // not in the given namespace. 2975 func evalNamespaceFilter(namespace string) func(interface{}) bool { 2976 return func(raw interface{}) bool { 2977 eval, ok := raw.(*structs.Evaluation) 2978 if !ok { 2979 return true 2980 } 2981 2982 return eval.Namespace != namespace 2983 } 2984 } 2985 2986 // EvalsByJob returns all the evaluations by job id 2987 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 2988 txn := s.db.ReadTxn() 2989 2990 // Get an iterator over the node allocations 2991 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 2992 if err != nil { 2993 return nil, err 2994 } 2995 2996 ws.Add(iter.WatchCh()) 2997 2998 var out []*structs.Evaluation 2999 for { 3000 raw := iter.Next() 3001 if raw == nil { 3002 break 3003 } 3004 3005 e := raw.(*structs.Evaluation) 3006 3007 // Filter non-exact matches 3008 if e.JobID != jobID { 3009 continue 3010 } 3011 3012 out = append(out, e) 3013 } 3014 return out, nil 3015 } 3016 3017 // Evals returns an iterator over all the evaluations 3018 func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3019 txn := s.db.ReadTxn() 3020 3021 // Walk the entire table 3022 iter, err := txn.Get("evals", "id") 3023 if err != nil { 3024 return nil, err 3025 } 3026 3027 ws.Add(iter.WatchCh()) 3028 3029 return iter, nil 3030 } 3031 3032 // EvalsByNamespace returns an iterator over all the evaluations in the given 3033 // namespace 3034 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 3035 txn := s.db.ReadTxn() 3036 3037 // Walk the entire table 3038 iter, err := txn.Get("evals", "namespace", namespace) 3039 if err != nil { 3040 return nil, err 3041 } 3042 3043 ws.Add(iter.WatchCh()) 3044 3045 return iter, nil 3046 } 3047 3048 // UpdateAllocsFromClient is used to update an allocation based on input 3049 // from a client. While the schedulers are the authority on the allocation for 3050 // most things, some updates are authoritative from the client. Specifically, 3051 // the desired state comes from the schedulers, while the actual state comes 3052 // from clients. 3053 func (s *StateStore) UpdateAllocsFromClient(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3054 txn := s.db.WriteTxnMsgT(msgType, index) 3055 defer txn.Abort() 3056 3057 // Handle each of the updated allocations 3058 for _, alloc := range allocs { 3059 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 3060 return err 3061 } 3062 } 3063 3064 // Update the indexes 3065 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3066 return fmt.Errorf("index update failed: %v", err) 3067 } 3068 3069 return txn.Commit() 3070 } 3071 3072 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 3073 func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *structs.Allocation) error { 3074 // Look for existing alloc 3075 existing, err := txn.First("allocs", "id", alloc.ID) 3076 if err != nil { 3077 return fmt.Errorf("alloc lookup failed: %v", err) 3078 } 3079 3080 // Nothing to do if this does not exist 3081 if existing == nil { 3082 return nil 3083 } 3084 exist := existing.(*structs.Allocation) 3085 3086 // Copy everything from the existing allocation 3087 copyAlloc := exist.Copy() 3088 3089 // Pull in anything the client is the authority on 3090 copyAlloc.ClientStatus = alloc.ClientStatus 3091 copyAlloc.ClientDescription = alloc.ClientDescription 3092 copyAlloc.TaskStates = alloc.TaskStates 3093 copyAlloc.NetworkStatus = alloc.NetworkStatus 3094 3095 // The client can only set its deployment health and timestamp, so just take 3096 // those 3097 if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil { 3098 oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth() 3099 newHasHealthy := alloc.DeploymentStatus.HasHealth() 3100 3101 // We got new health information from the client 3102 if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) { 3103 // Updated deployment health and timestamp 3104 copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy) 3105 copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp 3106 copyAlloc.DeploymentStatus.ModifyIndex = index 3107 } 3108 } else if alloc.DeploymentStatus != nil { 3109 // First time getting a deployment status so copy everything and just 3110 // set the index 3111 copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy() 3112 copyAlloc.DeploymentStatus.ModifyIndex = index 3113 } 3114 3115 // Update the modify index 3116 copyAlloc.ModifyIndex = index 3117 3118 // Update the modify time 3119 copyAlloc.ModifyTime = alloc.ModifyTime 3120 3121 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 3122 return fmt.Errorf("error updating deployment: %v", err) 3123 } 3124 3125 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 3126 return fmt.Errorf("error updating job summary: %v", err) 3127 } 3128 3129 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 3130 return err 3131 } 3132 3133 if err := s.updatePluginWithAlloc(index, copyAlloc, txn); err != nil { 3134 return err 3135 } 3136 3137 // Update the allocation 3138 if err := txn.Insert("allocs", copyAlloc); err != nil { 3139 return fmt.Errorf("alloc insert failed: %v", err) 3140 } 3141 3142 // Set the job's status 3143 forceStatus := "" 3144 if !copyAlloc.TerminalStatus() { 3145 forceStatus = structs.JobStatusRunning 3146 } 3147 3148 tuple := structs.NamespacedID{ 3149 ID: exist.JobID, 3150 Namespace: exist.Namespace, 3151 } 3152 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 3153 3154 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3155 return fmt.Errorf("setting job status failed: %v", err) 3156 } 3157 return nil 3158 } 3159 3160 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 3161 // the same time. 3162 func (s *StateStore) UpsertAllocs(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3163 txn := s.db.WriteTxn(index) 3164 defer txn.Abort() 3165 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 3166 return err 3167 } 3168 return txn.Commit() 3169 } 3170 3171 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 3172 // used with an existing transaction. 3173 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *txn) error { 3174 // Handle the allocations 3175 jobs := make(map[structs.NamespacedID]string, 1) 3176 for _, alloc := range allocs { 3177 existing, err := txn.First("allocs", "id", alloc.ID) 3178 if err != nil { 3179 return fmt.Errorf("alloc lookup failed: %v", err) 3180 } 3181 exist, _ := existing.(*structs.Allocation) 3182 3183 if exist == nil { 3184 alloc.CreateIndex = index 3185 alloc.ModifyIndex = index 3186 alloc.AllocModifyIndex = index 3187 if alloc.DeploymentStatus != nil { 3188 alloc.DeploymentStatus.ModifyIndex = index 3189 } 3190 3191 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 3192 // the a race between a forced garbage collection and the scheduler 3193 // marking an allocation as terminal. The issue is that the 3194 // allocation from the scheduler has its job normalized and the FSM 3195 // will only denormalize if the allocation is not terminal. However 3196 // if the allocation is garbage collected, that will result in a 3197 // allocation being upserted for the first time without a job 3198 // attached. By returning an error here, it will cause the FSM to 3199 // error, causing the plan_apply to error and thus causing the 3200 // evaluation to be failed. This will force an index refresh that 3201 // should solve this issue. 3202 if alloc.Job == nil { 3203 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 3204 } 3205 } else { 3206 alloc.CreateIndex = exist.CreateIndex 3207 alloc.ModifyIndex = index 3208 alloc.AllocModifyIndex = index 3209 3210 // Keep the clients task states 3211 alloc.TaskStates = exist.TaskStates 3212 3213 // If the scheduler is marking this allocation as lost we do not 3214 // want to reuse the status of the existing allocation. 3215 if alloc.ClientStatus != structs.AllocClientStatusLost { 3216 alloc.ClientStatus = exist.ClientStatus 3217 alloc.ClientDescription = exist.ClientDescription 3218 } 3219 3220 // The job has been denormalized so re-attach the original job 3221 if alloc.Job == nil { 3222 alloc.Job = exist.Job 3223 } 3224 } 3225 3226 // OPTIMIZATION: 3227 // These should be given a map of new to old allocation and the updates 3228 // should be one on all changes. The current implementation causes O(n) 3229 // lookups/copies/insertions rather than O(1) 3230 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 3231 return fmt.Errorf("error updating deployment: %v", err) 3232 } 3233 3234 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 3235 return fmt.Errorf("error updating job summary: %v", err) 3236 } 3237 3238 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 3239 return err 3240 } 3241 3242 if err := s.updatePluginWithAlloc(index, alloc, txn); err != nil { 3243 return err 3244 } 3245 3246 if err := txn.Insert("allocs", alloc); err != nil { 3247 return fmt.Errorf("alloc insert failed: %v", err) 3248 } 3249 3250 if alloc.PreviousAllocation != "" { 3251 prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation) 3252 if err != nil { 3253 return fmt.Errorf("alloc lookup failed: %v", err) 3254 } 3255 existingPrevAlloc, _ := prevAlloc.(*structs.Allocation) 3256 if existingPrevAlloc != nil { 3257 prevAllocCopy := existingPrevAlloc.Copy() 3258 prevAllocCopy.NextAllocation = alloc.ID 3259 prevAllocCopy.ModifyIndex = index 3260 if err := txn.Insert("allocs", prevAllocCopy); err != nil { 3261 return fmt.Errorf("alloc insert failed: %v", err) 3262 } 3263 } 3264 } 3265 3266 // If the allocation is running, force the job to running status. 3267 forceStatus := "" 3268 if !alloc.TerminalStatus() { 3269 forceStatus = structs.JobStatusRunning 3270 } 3271 3272 tuple := structs.NamespacedID{ 3273 ID: alloc.JobID, 3274 Namespace: alloc.Namespace, 3275 } 3276 jobs[tuple] = forceStatus 3277 } 3278 3279 // Update the indexes 3280 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3281 return fmt.Errorf("index update failed: %v", err) 3282 } 3283 3284 // Set the job's status 3285 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3286 return fmt.Errorf("setting job status failed: %v", err) 3287 } 3288 3289 return nil 3290 } 3291 3292 // UpdateAllocsDesiredTransitions is used to update a set of allocations 3293 // desired transitions. 3294 func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType, index uint64, allocs map[string]*structs.DesiredTransition, 3295 evals []*structs.Evaluation) error { 3296 3297 txn := s.db.WriteTxnMsgT(msgType, index) 3298 defer txn.Abort() 3299 3300 // Handle each of the updated allocations 3301 for id, transition := range allocs { 3302 if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil { 3303 return err 3304 } 3305 } 3306 3307 for _, eval := range evals { 3308 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 3309 return err 3310 } 3311 } 3312 3313 // Update the indexes 3314 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3315 return fmt.Errorf("index update failed: %v", err) 3316 } 3317 3318 return txn.Commit() 3319 } 3320 3321 // nestedUpdateAllocDesiredTransition is used to nest an update of an 3322 // allocations desired transition 3323 func (s *StateStore) nestedUpdateAllocDesiredTransition( 3324 txn *txn, index uint64, allocID string, 3325 transition *structs.DesiredTransition) error { 3326 3327 // Look for existing alloc 3328 existing, err := txn.First("allocs", "id", allocID) 3329 if err != nil { 3330 return fmt.Errorf("alloc lookup failed: %v", err) 3331 } 3332 3333 // Nothing to do if this does not exist 3334 if existing == nil { 3335 return nil 3336 } 3337 exist := existing.(*structs.Allocation) 3338 3339 // Copy everything from the existing allocation 3340 copyAlloc := exist.Copy() 3341 3342 // Merge the desired transitions 3343 copyAlloc.DesiredTransition.Merge(transition) 3344 3345 // Update the modify index 3346 copyAlloc.ModifyIndex = index 3347 3348 // Update the allocation 3349 if err := txn.Insert("allocs", copyAlloc); err != nil { 3350 return fmt.Errorf("alloc insert failed: %v", err) 3351 } 3352 3353 return nil 3354 } 3355 3356 // AllocByID is used to lookup an allocation by its ID 3357 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 3358 txn := s.db.ReadTxn() 3359 return s.allocByIDImpl(txn, ws, id) 3360 } 3361 3362 // allocByIDImpl retrives an allocation and is called under and existing 3363 // transaction. An optional watch set can be passed to add allocations to the 3364 // watch set 3365 func (s *StateStore) allocByIDImpl(txn Txn, ws memdb.WatchSet, id string) (*structs.Allocation, error) { 3366 watchCh, raw, err := txn.FirstWatch("allocs", "id", id) 3367 if err != nil { 3368 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3369 } 3370 3371 ws.Add(watchCh) 3372 3373 if raw == nil { 3374 return nil, nil 3375 } 3376 alloc := raw.(*structs.Allocation) 3377 return alloc, nil 3378 } 3379 3380 // AllocsByIDPrefix is used to lookup allocs by prefix 3381 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 3382 txn := s.db.ReadTxn() 3383 3384 iter, err := txn.Get("allocs", "id_prefix", id) 3385 if err != nil { 3386 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3387 } 3388 3389 ws.Add(iter.WatchCh()) 3390 3391 // Wrap the iterator in a filter 3392 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 3393 return wrap, nil 3394 } 3395 3396 // allocNamespaceFilter returns a filter function that filters all allocations 3397 // not in the given namespace. 3398 func allocNamespaceFilter(namespace string) func(interface{}) bool { 3399 return func(raw interface{}) bool { 3400 alloc, ok := raw.(*structs.Allocation) 3401 if !ok { 3402 return true 3403 } 3404 3405 return alloc.Namespace != namespace 3406 } 3407 } 3408 3409 // AllocsByIDPrefix is used to lookup allocs by prefix 3410 func (s *StateStore) AllocsByIDPrefixAllNSs(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3411 txn := s.db.ReadTxn() 3412 3413 iter, err := txn.Get("allocs", "id_prefix", prefix) 3414 if err != nil { 3415 return nil, fmt.Errorf("alloc lookup failed: %v", err) 3416 } 3417 3418 ws.Add(iter.WatchCh()) 3419 3420 return iter, nil 3421 } 3422 3423 // AllocsByNode returns all the allocations by node 3424 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 3425 txn := s.db.ReadTxn() 3426 3427 return allocsByNodeTxn(txn, ws, node) 3428 } 3429 3430 func allocsByNodeTxn(txn ReadTxn, ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 3431 // Get an iterator over the node allocations, using only the 3432 // node prefix which ignores the terminal status 3433 iter, err := txn.Get("allocs", "node_prefix", node) 3434 if err != nil { 3435 return nil, err 3436 } 3437 3438 ws.Add(iter.WatchCh()) 3439 3440 var out []*structs.Allocation 3441 for { 3442 raw := iter.Next() 3443 if raw == nil { 3444 break 3445 } 3446 out = append(out, raw.(*structs.Allocation)) 3447 } 3448 return out, nil 3449 } 3450 3451 // AllocsByNode returns all the allocations by node and terminal status 3452 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 3453 txn := s.db.ReadTxn() 3454 3455 // Get an iterator over the node allocations 3456 iter, err := txn.Get("allocs", "node", node, terminal) 3457 if err != nil { 3458 return nil, err 3459 } 3460 3461 ws.Add(iter.WatchCh()) 3462 3463 var out []*structs.Allocation 3464 for { 3465 raw := iter.Next() 3466 if raw == nil { 3467 break 3468 } 3469 out = append(out, raw.(*structs.Allocation)) 3470 } 3471 return out, nil 3472 } 3473 3474 // AllocsByJob returns allocations by job id 3475 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, anyCreateIndex bool) ([]*structs.Allocation, error) { 3476 txn := s.db.ReadTxn() 3477 3478 // Get the job 3479 var job *structs.Job 3480 rawJob, err := txn.First("jobs", "id", namespace, jobID) 3481 if err != nil { 3482 return nil, err 3483 } 3484 if rawJob != nil { 3485 job = rawJob.(*structs.Job) 3486 } 3487 3488 // Get an iterator over the node allocations 3489 iter, err := txn.Get("allocs", "job", namespace, jobID) 3490 if err != nil { 3491 return nil, err 3492 } 3493 3494 ws.Add(iter.WatchCh()) 3495 3496 var out []*structs.Allocation 3497 for { 3498 raw := iter.Next() 3499 if raw == nil { 3500 break 3501 } 3502 3503 alloc := raw.(*structs.Allocation) 3504 // If the allocation belongs to a job with the same ID but a different 3505 // create index and we are not getting all the allocations whose Jobs 3506 // matches the same Job ID then we skip it 3507 if !anyCreateIndex && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 3508 continue 3509 } 3510 out = append(out, raw.(*structs.Allocation)) 3511 } 3512 return out, nil 3513 } 3514 3515 // AllocsByEval returns all the allocations by eval id 3516 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 3517 txn := s.db.ReadTxn() 3518 3519 // Get an iterator over the eval allocations 3520 iter, err := txn.Get("allocs", "eval", evalID) 3521 if err != nil { 3522 return nil, err 3523 } 3524 3525 ws.Add(iter.WatchCh()) 3526 3527 var out []*structs.Allocation 3528 for { 3529 raw := iter.Next() 3530 if raw == nil { 3531 break 3532 } 3533 out = append(out, raw.(*structs.Allocation)) 3534 } 3535 return out, nil 3536 } 3537 3538 // AllocsByDeployment returns all the allocations by deployment id 3539 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 3540 txn := s.db.ReadTxn() 3541 3542 // Get an iterator over the deployments allocations 3543 iter, err := txn.Get("allocs", "deployment", deploymentID) 3544 if err != nil { 3545 return nil, err 3546 } 3547 3548 ws.Add(iter.WatchCh()) 3549 3550 var out []*structs.Allocation 3551 for { 3552 raw := iter.Next() 3553 if raw == nil { 3554 break 3555 } 3556 out = append(out, raw.(*structs.Allocation)) 3557 } 3558 return out, nil 3559 } 3560 3561 // Allocs returns an iterator over all the evaluations 3562 func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3563 txn := s.db.ReadTxn() 3564 3565 // Walk the entire table 3566 iter, err := txn.Get("allocs", "id") 3567 if err != nil { 3568 return nil, err 3569 } 3570 3571 ws.Add(iter.WatchCh()) 3572 3573 return iter, nil 3574 } 3575 3576 // AllocsByNamespace returns an iterator over all the allocations in the 3577 // namespace 3578 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 3579 txn := s.db.ReadTxn() 3580 return s.allocsByNamespaceImpl(ws, txn, namespace) 3581 } 3582 3583 // allocsByNamespaceImpl returns an iterator over all the allocations in the 3584 // namespace 3585 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *txn, namespace string) (memdb.ResultIterator, error) { 3586 // Walk the entire table 3587 iter, err := txn.Get("allocs", "namespace", namespace) 3588 if err != nil { 3589 return nil, err 3590 } 3591 3592 ws.Add(iter.WatchCh()) 3593 3594 return iter, nil 3595 } 3596 3597 // UpsertVaultAccessors is used to register a set of Vault Accessors 3598 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 3599 txn := s.db.WriteTxn(index) 3600 defer txn.Abort() 3601 3602 for _, accessor := range accessors { 3603 // Set the create index 3604 accessor.CreateIndex = index 3605 3606 // Insert the accessor 3607 if err := txn.Insert("vault_accessors", accessor); err != nil { 3608 return fmt.Errorf("accessor insert failed: %v", err) 3609 } 3610 } 3611 3612 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 3613 return fmt.Errorf("index update failed: %v", err) 3614 } 3615 3616 return txn.Commit() 3617 } 3618 3619 // DeleteVaultAccessors is used to delete a set of Vault Accessors 3620 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 3621 txn := s.db.WriteTxn(index) 3622 defer txn.Abort() 3623 3624 // Lookup the accessor 3625 for _, accessor := range accessors { 3626 // Delete the accessor 3627 if err := txn.Delete("vault_accessors", accessor); err != nil { 3628 return fmt.Errorf("accessor delete failed: %v", err) 3629 } 3630 } 3631 3632 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 3633 return fmt.Errorf("index update failed: %v", err) 3634 } 3635 3636 return txn.Commit() 3637 } 3638 3639 // VaultAccessor returns the given Vault accessor 3640 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 3641 txn := s.db.ReadTxn() 3642 3643 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 3644 if err != nil { 3645 return nil, fmt.Errorf("accessor lookup failed: %v", err) 3646 } 3647 3648 ws.Add(watchCh) 3649 3650 if existing != nil { 3651 return existing.(*structs.VaultAccessor), nil 3652 } 3653 3654 return nil, nil 3655 } 3656 3657 // VaultAccessors returns an iterator of Vault accessors. 3658 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3659 txn := s.db.ReadTxn() 3660 3661 iter, err := txn.Get("vault_accessors", "id") 3662 if err != nil { 3663 return nil, err 3664 } 3665 3666 ws.Add(iter.WatchCh()) 3667 3668 return iter, nil 3669 } 3670 3671 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 3672 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 3673 txn := s.db.ReadTxn() 3674 3675 // Get an iterator over the accessors 3676 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 3677 if err != nil { 3678 return nil, err 3679 } 3680 3681 ws.Add(iter.WatchCh()) 3682 3683 var out []*structs.VaultAccessor 3684 for { 3685 raw := iter.Next() 3686 if raw == nil { 3687 break 3688 } 3689 out = append(out, raw.(*structs.VaultAccessor)) 3690 } 3691 return out, nil 3692 } 3693 3694 // VaultAccessorsByNode returns all the Vault accessors by node id 3695 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 3696 txn := s.db.ReadTxn() 3697 3698 // Get an iterator over the accessors 3699 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 3700 if err != nil { 3701 return nil, err 3702 } 3703 3704 ws.Add(iter.WatchCh()) 3705 3706 var out []*structs.VaultAccessor 3707 for { 3708 raw := iter.Next() 3709 if raw == nil { 3710 break 3711 } 3712 out = append(out, raw.(*structs.VaultAccessor)) 3713 } 3714 return out, nil 3715 } 3716 3717 func indexEntry(table string, index uint64) *IndexEntry { 3718 return &IndexEntry{ 3719 Key: table, 3720 Value: index, 3721 } 3722 } 3723 3724 const siTokenAccessorTable = "si_token_accessors" 3725 3726 // UpsertSITokenAccessors is used to register a set of Service Identity token accessors. 3727 func (s *StateStore) UpsertSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 3728 txn := s.db.WriteTxn(index) 3729 defer txn.Abort() 3730 3731 for _, accessor := range accessors { 3732 // set the create index 3733 accessor.CreateIndex = index 3734 3735 // insert the accessor 3736 if err := txn.Insert(siTokenAccessorTable, accessor); err != nil { 3737 return errors.Wrap(err, "accessor insert failed") 3738 } 3739 } 3740 3741 // update the index for this table 3742 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 3743 return errors.Wrap(err, "index update failed") 3744 } 3745 3746 return txn.Commit() 3747 } 3748 3749 // DeleteSITokenAccessors is used to delete a set of Service Identity token accessors. 3750 func (s *StateStore) DeleteSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 3751 txn := s.db.WriteTxn(index) 3752 defer txn.Abort() 3753 3754 // Lookup each accessor 3755 for _, accessor := range accessors { 3756 // Delete the accessor 3757 if err := txn.Delete(siTokenAccessorTable, accessor); err != nil { 3758 return errors.Wrap(err, "accessor delete failed") 3759 } 3760 } 3761 3762 // update the index for this table 3763 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 3764 return errors.Wrap(err, "index update failed") 3765 } 3766 3767 return txn.Commit() 3768 } 3769 3770 // SITokenAccessor returns the given Service Identity token accessor. 3771 func (s *StateStore) SITokenAccessor(ws memdb.WatchSet, accessorID string) (*structs.SITokenAccessor, error) { 3772 txn := s.db.ReadTxn() 3773 defer txn.Abort() 3774 3775 watchCh, existing, err := txn.FirstWatch(siTokenAccessorTable, "id", accessorID) 3776 if err != nil { 3777 return nil, errors.Wrap(err, "accessor lookup failed") 3778 } 3779 3780 ws.Add(watchCh) 3781 3782 if existing != nil { 3783 return existing.(*structs.SITokenAccessor), nil 3784 } 3785 3786 return nil, nil 3787 } 3788 3789 // SITokenAccessors returns an iterator of Service Identity token accessors. 3790 func (s *StateStore) SITokenAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3791 txn := s.db.ReadTxn() 3792 defer txn.Abort() 3793 3794 iter, err := txn.Get(siTokenAccessorTable, "id") 3795 if err != nil { 3796 return nil, err 3797 } 3798 3799 ws.Add(iter.WatchCh()) 3800 3801 return iter, nil 3802 } 3803 3804 // SITokenAccessorsByAlloc returns all the Service Identity token accessors by alloc ID. 3805 func (s *StateStore) SITokenAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.SITokenAccessor, error) { 3806 txn := s.db.ReadTxn() 3807 defer txn.Abort() 3808 3809 // Get an iterator over the accessors 3810 iter, err := txn.Get(siTokenAccessorTable, "alloc_id", allocID) 3811 if err != nil { 3812 return nil, err 3813 } 3814 3815 ws.Add(iter.WatchCh()) 3816 3817 var result []*structs.SITokenAccessor 3818 for raw := iter.Next(); raw != nil; raw = iter.Next() { 3819 result = append(result, raw.(*structs.SITokenAccessor)) 3820 } 3821 3822 return result, nil 3823 } 3824 3825 // SITokenAccessorsByNode returns all the Service Identity token accessors by node ID. 3826 func (s *StateStore) SITokenAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.SITokenAccessor, error) { 3827 txn := s.db.ReadTxn() 3828 defer txn.Abort() 3829 3830 // Get an iterator over the accessors 3831 iter, err := txn.Get(siTokenAccessorTable, "node_id", nodeID) 3832 if err != nil { 3833 return nil, err 3834 } 3835 3836 ws.Add(iter.WatchCh()) 3837 3838 var result []*structs.SITokenAccessor 3839 for raw := iter.Next(); raw != nil; raw = iter.Next() { 3840 result = append(result, raw.(*structs.SITokenAccessor)) 3841 } 3842 3843 return result, nil 3844 } 3845 3846 // UpdateDeploymentStatus is used to make deployment status updates and 3847 // potentially make a evaluation 3848 func (s *StateStore) UpdateDeploymentStatus(msgType structs.MessageType, index uint64, req *structs.DeploymentStatusUpdateRequest) error { 3849 txn := s.db.WriteTxnMsgT(msgType, index) 3850 defer txn.Abort() 3851 3852 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 3853 return err 3854 } 3855 3856 // Upsert the job if necessary 3857 if req.Job != nil { 3858 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 3859 return err 3860 } 3861 } 3862 3863 // Upsert the optional eval 3864 if req.Eval != nil { 3865 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 3866 return err 3867 } 3868 } 3869 3870 return txn.Commit() 3871 } 3872 3873 // updateDeploymentStatusImpl is used to make deployment status updates 3874 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *txn) error { 3875 // Retrieve deployment 3876 ws := memdb.NewWatchSet() 3877 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 3878 if err != nil { 3879 return err 3880 } else if deployment == nil { 3881 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 3882 } else if !deployment.Active() { 3883 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 3884 } 3885 3886 // Apply the new status 3887 copy := deployment.Copy() 3888 copy.Status = u.Status 3889 copy.StatusDescription = u.StatusDescription 3890 copy.ModifyIndex = index 3891 3892 // Insert the deployment 3893 if err := txn.Insert("deployment", copy); err != nil { 3894 return err 3895 } 3896 3897 // Update the index 3898 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 3899 return fmt.Errorf("index update failed: %v", err) 3900 } 3901 3902 // If the deployment is being marked as complete, set the job to stable. 3903 if copy.Status == structs.DeploymentStatusSuccessful { 3904 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 3905 return fmt.Errorf("failed to update job stability: %v", err) 3906 } 3907 } 3908 3909 return nil 3910 } 3911 3912 // UpdateJobStability updates the stability of the given job and version to the 3913 // desired status. 3914 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 3915 txn := s.db.WriteTxn(index) 3916 defer txn.Abort() 3917 3918 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 3919 return err 3920 } 3921 3922 return txn.Commit() 3923 } 3924 3925 // updateJobStabilityImpl updates the stability of the given job and version 3926 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *txn) error { 3927 // Get the job that is referenced 3928 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 3929 if err != nil { 3930 return err 3931 } 3932 3933 // Has already been cleared, nothing to do 3934 if job == nil { 3935 return nil 3936 } 3937 3938 // If the job already has the desired stability, nothing to do 3939 if job.Stable == stable { 3940 return nil 3941 } 3942 3943 copy := job.Copy() 3944 copy.Stable = stable 3945 return s.upsertJobImpl(index, copy, true, txn) 3946 } 3947 3948 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 3949 // potentially make a evaluation 3950 func (s *StateStore) UpdateDeploymentPromotion(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 3951 txn := s.db.WriteTxnMsgT(msgType, index) 3952 defer txn.Abort() 3953 3954 // Retrieve deployment and ensure it is not terminal and is active 3955 ws := memdb.NewWatchSet() 3956 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 3957 if err != nil { 3958 return err 3959 } else if deployment == nil { 3960 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 3961 } else if !deployment.Active() { 3962 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 3963 } 3964 3965 // Retrieve effected allocations 3966 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 3967 if err != nil { 3968 return err 3969 } 3970 3971 // groupIndex is a map of groups being promoted 3972 groupIndex := make(map[string]struct{}, len(req.Groups)) 3973 for _, g := range req.Groups { 3974 groupIndex[g] = struct{}{} 3975 } 3976 3977 // canaryIndex is the set of placed canaries in the deployment 3978 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 3979 for _, dstate := range deployment.TaskGroups { 3980 for _, c := range dstate.PlacedCanaries { 3981 canaryIndex[c] = struct{}{} 3982 } 3983 } 3984 3985 // healthyCounts is a mapping of group to the number of healthy canaries 3986 healthyCounts := make(map[string]int, len(deployment.TaskGroups)) 3987 3988 // promotable is the set of allocations that we can move from canary to 3989 // non-canary 3990 var promotable []*structs.Allocation 3991 3992 for { 3993 raw := iter.Next() 3994 if raw == nil { 3995 break 3996 } 3997 3998 alloc := raw.(*structs.Allocation) 3999 4000 // Check that the alloc is a canary 4001 if _, ok := canaryIndex[alloc.ID]; !ok { 4002 continue 4003 } 4004 4005 // Check that the canary is part of a group being promoted 4006 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 4007 continue 4008 } 4009 4010 // Ensure the canaries are healthy 4011 if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() { 4012 continue 4013 } 4014 4015 healthyCounts[alloc.TaskGroup]++ 4016 promotable = append(promotable, alloc) 4017 } 4018 4019 // Determine if we have enough healthy allocations 4020 var unhealthyErr multierror.Error 4021 for tg, dstate := range deployment.TaskGroups { 4022 if _, ok := groupIndex[tg]; !req.All && !ok { 4023 continue 4024 } 4025 4026 need := dstate.DesiredCanaries 4027 if need == 0 { 4028 continue 4029 } 4030 4031 if have := healthyCounts[tg]; have < need { 4032 multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need)) 4033 } 4034 } 4035 4036 if err := unhealthyErr.ErrorOrNil(); err != nil { 4037 return err 4038 } 4039 4040 // Update deployment 4041 copy := deployment.Copy() 4042 copy.ModifyIndex = index 4043 for tg, status := range copy.TaskGroups { 4044 _, ok := groupIndex[tg] 4045 if !req.All && !ok { 4046 continue 4047 } 4048 4049 // reset the progress deadline 4050 if status.ProgressDeadline > 0 && !status.RequireProgressBy.IsZero() { 4051 status.RequireProgressBy = time.Now().Add(status.ProgressDeadline) 4052 } 4053 status.Promoted = true 4054 } 4055 4056 // If the deployment no longer needs promotion, update its status 4057 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 4058 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 4059 } 4060 4061 // Insert the deployment 4062 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 4063 return err 4064 } 4065 4066 // Upsert the optional eval 4067 if req.Eval != nil { 4068 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4069 return err 4070 } 4071 } 4072 4073 // For each promotable allocation remove the canary field 4074 for _, alloc := range promotable { 4075 promoted := alloc.Copy() 4076 promoted.DeploymentStatus.Canary = false 4077 promoted.DeploymentStatus.ModifyIndex = index 4078 promoted.ModifyIndex = index 4079 promoted.AllocModifyIndex = index 4080 4081 if err := txn.Insert("allocs", promoted); err != nil { 4082 return fmt.Errorf("alloc insert failed: %v", err) 4083 } 4084 } 4085 4086 // Update the alloc index 4087 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4088 return fmt.Errorf("index update failed: %v", err) 4089 } 4090 4091 return txn.Commit() 4092 } 4093 4094 // UpdateDeploymentAllocHealth is used to update the health of allocations as 4095 // part of the deployment and potentially make a evaluation 4096 func (s *StateStore) UpdateDeploymentAllocHealth(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 4097 txn := s.db.WriteTxnMsgT(msgType, index) 4098 defer txn.Abort() 4099 4100 // Retrieve deployment and ensure it is not terminal and is active 4101 ws := memdb.NewWatchSet() 4102 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 4103 if err != nil { 4104 return err 4105 } else if deployment == nil { 4106 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 4107 } else if !deployment.Active() { 4108 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4109 } 4110 4111 // Update the health status of each allocation 4112 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 4113 setAllocHealth := func(id string, healthy bool, ts time.Time) error { 4114 existing, err := txn.First("allocs", "id", id) 4115 if err != nil { 4116 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 4117 } 4118 if existing == nil { 4119 return fmt.Errorf("unknown alloc %q", id) 4120 } 4121 4122 old := existing.(*structs.Allocation) 4123 if old.DeploymentID != req.DeploymentID { 4124 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 4125 } 4126 4127 // Set the health 4128 copy := old.Copy() 4129 if copy.DeploymentStatus == nil { 4130 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 4131 } 4132 copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy) 4133 copy.DeploymentStatus.Timestamp = ts 4134 copy.DeploymentStatus.ModifyIndex = index 4135 copy.ModifyIndex = index 4136 4137 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 4138 return fmt.Errorf("error updating deployment: %v", err) 4139 } 4140 4141 if err := txn.Insert("allocs", copy); err != nil { 4142 return fmt.Errorf("alloc insert failed: %v", err) 4143 } 4144 4145 return nil 4146 } 4147 4148 for _, id := range req.HealthyAllocationIDs { 4149 if err := setAllocHealth(id, true, req.Timestamp); err != nil { 4150 return err 4151 } 4152 } 4153 for _, id := range req.UnhealthyAllocationIDs { 4154 if err := setAllocHealth(id, false, req.Timestamp); err != nil { 4155 return err 4156 } 4157 } 4158 4159 // Update the indexes 4160 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4161 return fmt.Errorf("index update failed: %v", err) 4162 } 4163 } 4164 4165 // Update the deployment status as needed. 4166 if req.DeploymentUpdate != nil { 4167 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 4168 return err 4169 } 4170 } 4171 4172 // Upsert the job if necessary 4173 if req.Job != nil { 4174 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 4175 return err 4176 } 4177 } 4178 4179 // Upsert the optional eval 4180 if req.Eval != nil { 4181 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4182 return err 4183 } 4184 } 4185 4186 return txn.Commit() 4187 } 4188 4189 // LastIndex returns the greatest index value for all indexes 4190 func (s *StateStore) LatestIndex() (uint64, error) { 4191 indexes, err := s.Indexes() 4192 if err != nil { 4193 return 0, err 4194 } 4195 4196 var max uint64 = 0 4197 for { 4198 raw := indexes.Next() 4199 if raw == nil { 4200 break 4201 } 4202 4203 // Prepare the request struct 4204 idx := raw.(*IndexEntry) 4205 4206 // Determine the max 4207 if idx.Value > max { 4208 max = idx.Value 4209 } 4210 } 4211 4212 return max, nil 4213 } 4214 4215 // Index finds the matching index value 4216 func (s *StateStore) Index(name string) (uint64, error) { 4217 txn := s.db.ReadTxn() 4218 4219 // Lookup the first matching index 4220 out, err := txn.First("index", "id", name) 4221 if err != nil { 4222 return 0, err 4223 } 4224 if out == nil { 4225 return 0, nil 4226 } 4227 return out.(*IndexEntry).Value, nil 4228 } 4229 4230 // Indexes returns an iterator over all the indexes 4231 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 4232 txn := s.db.ReadTxn() 4233 4234 // Walk the entire nodes table 4235 iter, err := txn.Get("index", "id") 4236 if err != nil { 4237 return nil, err 4238 } 4239 return iter, nil 4240 } 4241 4242 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 4243 // store 4244 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 4245 txn := s.db.WriteTxn(index) 4246 defer txn.Abort() 4247 4248 // Get all the jobs 4249 iter, err := txn.Get("jobs", "id") 4250 if err != nil { 4251 return err 4252 } 4253 // COMPAT: Remove after 0.11 4254 // Iterate over jobs to build a list of parent jobs and their children 4255 parentMap := make(map[string][]*structs.Job) 4256 for { 4257 rawJob := iter.Next() 4258 if rawJob == nil { 4259 break 4260 } 4261 job := rawJob.(*structs.Job) 4262 if job.ParentID != "" { 4263 children := parentMap[job.ParentID] 4264 children = append(children, job) 4265 parentMap[job.ParentID] = children 4266 } 4267 } 4268 4269 // Get all the jobs again 4270 iter, err = txn.Get("jobs", "id") 4271 if err != nil { 4272 return err 4273 } 4274 4275 for { 4276 rawJob := iter.Next() 4277 if rawJob == nil { 4278 break 4279 } 4280 job := rawJob.(*structs.Job) 4281 4282 if job.IsParameterized() || job.IsPeriodic() { 4283 // COMPAT: Remove after 0.11 4284 4285 // The following block of code fixes incorrect child summaries due to a bug 4286 // See https://github.com/hashicorp/nomad/issues/3886 for details 4287 rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID) 4288 if err != nil { 4289 return err 4290 } 4291 if rawSummary == nil { 4292 continue 4293 } 4294 4295 oldSummary := rawSummary.(*structs.JobSummary) 4296 4297 // Create an empty summary 4298 summary := &structs.JobSummary{ 4299 JobID: job.ID, 4300 Namespace: job.Namespace, 4301 Summary: make(map[string]structs.TaskGroupSummary), 4302 Children: &structs.JobChildrenSummary{}, 4303 } 4304 4305 // Iterate over children of this job if any to fix summary counts 4306 children := parentMap[job.ID] 4307 for _, childJob := range children { 4308 switch childJob.Status { 4309 case structs.JobStatusPending: 4310 summary.Children.Pending++ 4311 case structs.JobStatusDead: 4312 summary.Children.Dead++ 4313 case structs.JobStatusRunning: 4314 summary.Children.Running++ 4315 } 4316 } 4317 4318 // Insert the job summary if its different 4319 if !reflect.DeepEqual(summary, oldSummary) { 4320 // Set the create index of the summary same as the job's create index 4321 // and the modify index to the current index 4322 summary.CreateIndex = job.CreateIndex 4323 summary.ModifyIndex = index 4324 4325 if err := txn.Insert("job_summary", summary); err != nil { 4326 return fmt.Errorf("error inserting job summary: %v", err) 4327 } 4328 } 4329 4330 // Done with handling a parent job, continue to next 4331 continue 4332 } 4333 4334 // Create a job summary for the job 4335 summary := &structs.JobSummary{ 4336 JobID: job.ID, 4337 Namespace: job.Namespace, 4338 Summary: make(map[string]structs.TaskGroupSummary), 4339 } 4340 for _, tg := range job.TaskGroups { 4341 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 4342 } 4343 4344 // Find all the allocations for the jobs 4345 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 4346 if err != nil { 4347 return err 4348 } 4349 4350 // Calculate the summary for the job 4351 for { 4352 rawAlloc := iterAllocs.Next() 4353 if rawAlloc == nil { 4354 break 4355 } 4356 alloc := rawAlloc.(*structs.Allocation) 4357 4358 // Ignore the allocation if it doesn't belong to the currently 4359 // registered job. The allocation is checked because of issue #2304 4360 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 4361 continue 4362 } 4363 4364 tg := summary.Summary[alloc.TaskGroup] 4365 switch alloc.ClientStatus { 4366 case structs.AllocClientStatusFailed: 4367 tg.Failed += 1 4368 case structs.AllocClientStatusLost: 4369 tg.Lost += 1 4370 case structs.AllocClientStatusComplete: 4371 tg.Complete += 1 4372 case structs.AllocClientStatusRunning: 4373 tg.Running += 1 4374 case structs.AllocClientStatusPending: 4375 tg.Starting += 1 4376 default: 4377 s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID) 4378 } 4379 summary.Summary[alloc.TaskGroup] = tg 4380 } 4381 4382 // Set the create index of the summary same as the job's create index 4383 // and the modify index to the current index 4384 summary.CreateIndex = job.CreateIndex 4385 summary.ModifyIndex = index 4386 4387 // Insert the job summary 4388 if err := txn.Insert("job_summary", summary); err != nil { 4389 return fmt.Errorf("error inserting job summary: %v", err) 4390 } 4391 } 4392 4393 // Update the indexes table for job summary 4394 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4395 return fmt.Errorf("index update failed: %v", err) 4396 } 4397 return txn.Commit() 4398 } 4399 4400 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 4401 // It takes a map of job IDs to an optional forceStatus string. It returns an 4402 // error if the job doesn't exist or setJobStatus fails. 4403 func (s *StateStore) setJobStatuses(index uint64, txn *txn, 4404 jobs map[structs.NamespacedID]string, evalDelete bool) error { 4405 for tuple, forceStatus := range jobs { 4406 4407 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 4408 if err != nil { 4409 return fmt.Errorf("job lookup failed: %v", err) 4410 } 4411 4412 if existing == nil { 4413 continue 4414 } 4415 4416 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 4417 return err 4418 } 4419 4420 } 4421 4422 return nil 4423 } 4424 4425 // setJobStatus sets the status of the job by looking up associated evaluations 4426 // and allocations. evalDelete should be set to true if setJobStatus is being 4427 // called because an evaluation is being deleted (potentially because of garbage 4428 // collection). If forceStatus is non-empty, the job's status will be set to the 4429 // passed status. 4430 func (s *StateStore) setJobStatus(index uint64, txn *txn, 4431 job *structs.Job, evalDelete bool, forceStatus string) error { 4432 4433 // Capture the current status so we can check if there is a change 4434 oldStatus := job.Status 4435 newStatus := forceStatus 4436 4437 // If forceStatus is not set, compute the jobs status. 4438 if forceStatus == "" { 4439 var err error 4440 newStatus, err = s.getJobStatus(txn, job, evalDelete) 4441 if err != nil { 4442 return err 4443 } 4444 } 4445 4446 // Fast-path if the job has not changed. 4447 if oldStatus == newStatus { 4448 return nil 4449 } 4450 4451 // Copy and update the existing job 4452 updated := job.Copy() 4453 updated.Status = newStatus 4454 updated.ModifyIndex = index 4455 4456 // Insert the job 4457 if err := txn.Insert("jobs", updated); err != nil { 4458 return fmt.Errorf("job insert failed: %v", err) 4459 } 4460 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 4461 return fmt.Errorf("index update failed: %v", err) 4462 } 4463 4464 // Update the children summary 4465 if err := s.setJobSummary(txn, updated, index, oldStatus, newStatus); err != nil { 4466 return fmt.Errorf("job summary update failed %w", err) 4467 } 4468 return nil 4469 } 4470 4471 func (s *StateStore) setJobSummary(txn *txn, updated *structs.Job, index uint64, oldStatus, newStatus string) error { 4472 if updated.ParentID == "" { 4473 return nil 4474 } 4475 4476 // Try to update the summary of the parent job summary 4477 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 4478 if err != nil { 4479 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 4480 } 4481 4482 // Only continue if the summary exists. It could not exist if the parent 4483 // job was removed 4484 if summaryRaw != nil { 4485 existing := summaryRaw.(*structs.JobSummary) 4486 pSummary := existing.Copy() 4487 if pSummary.Children == nil { 4488 pSummary.Children = new(structs.JobChildrenSummary) 4489 } 4490 4491 // Determine the transition and update the correct fields 4492 children := pSummary.Children 4493 4494 // Decrement old status 4495 if oldStatus != "" { 4496 switch oldStatus { 4497 case structs.JobStatusPending: 4498 children.Pending-- 4499 case structs.JobStatusRunning: 4500 children.Running-- 4501 case structs.JobStatusDead: 4502 children.Dead-- 4503 default: 4504 return fmt.Errorf("unknown old job status %q", oldStatus) 4505 } 4506 } 4507 4508 // Increment new status 4509 switch newStatus { 4510 case structs.JobStatusPending: 4511 children.Pending++ 4512 case structs.JobStatusRunning: 4513 children.Running++ 4514 case structs.JobStatusDead: 4515 children.Dead++ 4516 default: 4517 return fmt.Errorf("unknown new job status %q", newStatus) 4518 } 4519 4520 // Update the index 4521 pSummary.ModifyIndex = index 4522 4523 // Insert the summary 4524 if err := txn.Insert("job_summary", pSummary); err != nil { 4525 return fmt.Errorf("job summary insert failed: %v", err) 4526 } 4527 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4528 return fmt.Errorf("index update failed: %v", err) 4529 } 4530 } 4531 return nil 4532 } 4533 4534 func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) { 4535 // System, Periodic and Parameterized jobs are running until explicitly 4536 // stopped 4537 if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() { 4538 if job.Stop { 4539 return structs.JobStatusDead, nil 4540 } 4541 4542 return structs.JobStatusRunning, nil 4543 } 4544 4545 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 4546 if err != nil { 4547 return "", err 4548 } 4549 4550 // If there is a non-terminal allocation, the job is running. 4551 hasAlloc := false 4552 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 4553 hasAlloc = true 4554 if !alloc.(*structs.Allocation).TerminalStatus() { 4555 return structs.JobStatusRunning, nil 4556 } 4557 } 4558 4559 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 4560 if err != nil { 4561 return "", err 4562 } 4563 4564 hasEval := false 4565 for raw := evals.Next(); raw != nil; raw = evals.Next() { 4566 e := raw.(*structs.Evaluation) 4567 4568 // Filter non-exact matches 4569 if e.JobID != job.ID { 4570 continue 4571 } 4572 4573 hasEval = true 4574 if !e.TerminalStatus() { 4575 return structs.JobStatusPending, nil 4576 } 4577 } 4578 4579 // The job is dead if all the allocations and evals are terminal or if there 4580 // are no evals because of garbage collection. 4581 if evalDelete || hasEval || hasAlloc { 4582 return structs.JobStatusDead, nil 4583 } 4584 4585 return structs.JobStatusPending, nil 4586 } 4587 4588 // updateSummaryWithJob creates or updates job summaries when new jobs are 4589 // upserted or existing ones are updated 4590 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 4591 txn *txn) error { 4592 4593 // Update the job summary 4594 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 4595 if err != nil { 4596 return fmt.Errorf("job summary lookup failed: %v", err) 4597 } 4598 4599 // Get the summary or create if necessary 4600 var summary *structs.JobSummary 4601 hasSummaryChanged := false 4602 if summaryRaw != nil { 4603 summary = summaryRaw.(*structs.JobSummary).Copy() 4604 } else { 4605 summary = &structs.JobSummary{ 4606 JobID: job.ID, 4607 Namespace: job.Namespace, 4608 Summary: make(map[string]structs.TaskGroupSummary), 4609 Children: new(structs.JobChildrenSummary), 4610 CreateIndex: index, 4611 } 4612 hasSummaryChanged = true 4613 } 4614 4615 for _, tg := range job.TaskGroups { 4616 if _, ok := summary.Summary[tg.Name]; !ok { 4617 newSummary := structs.TaskGroupSummary{ 4618 Complete: 0, 4619 Failed: 0, 4620 Running: 0, 4621 Starting: 0, 4622 } 4623 summary.Summary[tg.Name] = newSummary 4624 hasSummaryChanged = true 4625 } 4626 } 4627 4628 // The job summary has changed, so update the modify index. 4629 if hasSummaryChanged { 4630 summary.ModifyIndex = index 4631 4632 // Update the indexes table for job summary 4633 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4634 return fmt.Errorf("index update failed: %v", err) 4635 } 4636 if err := txn.Insert("job_summary", summary); err != nil { 4637 return err 4638 } 4639 } 4640 4641 return nil 4642 } 4643 4644 // updateJobScalingPolicies upserts any scaling policies contained in the job and removes 4645 // any previous scaling policies that were removed from the job 4646 func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 4647 4648 ws := memdb.NewWatchSet() 4649 4650 scalingPolicies := job.GetScalingPolicies() 4651 newTargets := map[string]bool{} 4652 for _, p := range scalingPolicies { 4653 newTargets[p.JobKey()] = true 4654 } 4655 // find existing policies that need to be deleted 4656 deletedPolicies := []string{} 4657 iter, err := s.ScalingPoliciesByJobTxn(ws, job.Namespace, job.ID, txn) 4658 if err != nil { 4659 return fmt.Errorf("ScalingPoliciesByJob lookup failed: %v", err) 4660 } 4661 for raw := iter.Next(); raw != nil; raw = iter.Next() { 4662 oldPolicy := raw.(*structs.ScalingPolicy) 4663 if !newTargets[oldPolicy.JobKey()] { 4664 deletedPolicies = append(deletedPolicies, oldPolicy.ID) 4665 } 4666 } 4667 err = s.DeleteScalingPoliciesTxn(index, deletedPolicies, txn) 4668 if err != nil { 4669 return fmt.Errorf("DeleteScalingPolicies of removed policies failed: %v", err) 4670 } 4671 4672 err = s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn) 4673 if err != nil { 4674 return fmt.Errorf("UpsertScalingPolicies of policies failed: %v", err) 4675 } 4676 4677 return nil 4678 } 4679 4680 // updateJobCSIPlugins runs on job update, and indexes the job in the plugin 4681 func (s *StateStore) updateJobCSIPlugins(index uint64, job, prev *structs.Job, txn *txn) error { 4682 plugIns := make(map[string]*structs.CSIPlugin) 4683 4684 loop := func(job *structs.Job, delete bool) error { 4685 for _, tg := range job.TaskGroups { 4686 for _, t := range tg.Tasks { 4687 if t.CSIPluginConfig == nil { 4688 continue 4689 } 4690 4691 plugIn, ok := plugIns[t.CSIPluginConfig.ID] 4692 if !ok { 4693 p, err := s.CSIPluginByIDTxn(txn, nil, t.CSIPluginConfig.ID) 4694 if err != nil { 4695 return err 4696 } 4697 if p == nil { 4698 plugIn = structs.NewCSIPlugin(t.CSIPluginConfig.ID, index) 4699 } else { 4700 plugIn = p.Copy() 4701 plugIn.ModifyIndex = index 4702 } 4703 plugIns[plugIn.ID] = plugIn 4704 } 4705 4706 if delete { 4707 plugIn.DeleteJob(job, nil) 4708 } else { 4709 plugIn.AddJob(job, nil) 4710 } 4711 } 4712 } 4713 4714 return nil 4715 } 4716 4717 if prev != nil { 4718 err := loop(prev, true) 4719 if err != nil { 4720 return err 4721 } 4722 } 4723 4724 err := loop(job, false) 4725 if err != nil { 4726 return err 4727 } 4728 4729 for _, plugIn := range plugIns { 4730 err = txn.Insert("csi_plugins", plugIn) 4731 if err != nil { 4732 return fmt.Errorf("csi_plugins insert error: %v", err) 4733 } 4734 } 4735 4736 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 4737 return fmt.Errorf("index update failed: %v", err) 4738 } 4739 4740 return nil 4741 } 4742 4743 // updateDeploymentWithAlloc is used to update the deployment state associated 4744 // with the given allocation. The passed alloc may be updated if the deployment 4745 // status has changed to capture the modify index at which it has changed. 4746 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *txn) error { 4747 // Nothing to do if the allocation is not associated with a deployment 4748 if alloc.DeploymentID == "" { 4749 return nil 4750 } 4751 4752 // Get the deployment 4753 ws := memdb.NewWatchSet() 4754 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 4755 if err != nil { 4756 return err 4757 } 4758 if deployment == nil { 4759 return nil 4760 } 4761 4762 // Retrieve the deployment state object 4763 _, ok := deployment.TaskGroups[alloc.TaskGroup] 4764 if !ok { 4765 // If the task group isn't part of the deployment, the task group wasn't 4766 // part of a rolling update so nothing to do 4767 return nil 4768 } 4769 4770 // Do not modify in-place. Instead keep track of what must be done 4771 placed := 0 4772 healthy := 0 4773 unhealthy := 0 4774 4775 // If there was no existing allocation, this is a placement and we increment 4776 // the placement 4777 existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth() 4778 allocHealthSet := alloc.DeploymentStatus.HasHealth() 4779 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 4780 placed++ 4781 } else if !existingHealthSet && allocHealthSet { 4782 if *alloc.DeploymentStatus.Healthy { 4783 healthy++ 4784 } else { 4785 unhealthy++ 4786 } 4787 } else if existingHealthSet && allocHealthSet { 4788 // See if it has gone from healthy to unhealthy 4789 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 4790 healthy-- 4791 unhealthy++ 4792 } 4793 } 4794 4795 // Nothing to do 4796 if placed == 0 && healthy == 0 && unhealthy == 0 { 4797 return nil 4798 } 4799 4800 // Update the allocation's deployment status modify index 4801 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 4802 alloc.DeploymentStatus.ModifyIndex = index 4803 } 4804 4805 // Create a copy of the deployment object 4806 deploymentCopy := deployment.Copy() 4807 deploymentCopy.ModifyIndex = index 4808 4809 dstate := deploymentCopy.TaskGroups[alloc.TaskGroup] 4810 dstate.PlacedAllocs += placed 4811 dstate.HealthyAllocs += healthy 4812 dstate.UnhealthyAllocs += unhealthy 4813 4814 // Ensure PlacedCanaries accurately reflects the alloc canary status 4815 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary { 4816 found := false 4817 for _, canary := range dstate.PlacedCanaries { 4818 if alloc.ID == canary { 4819 found = true 4820 break 4821 } 4822 } 4823 if !found { 4824 dstate.PlacedCanaries = append(dstate.PlacedCanaries, alloc.ID) 4825 } 4826 } 4827 4828 // Update the progress deadline 4829 if pd := dstate.ProgressDeadline; pd != 0 { 4830 // If we are the first placed allocation for the deployment start the progress deadline. 4831 if placed != 0 && dstate.RequireProgressBy.IsZero() { 4832 // Use modify time instead of create time because we may in-place 4833 // update the allocation to be part of a new deployment. 4834 dstate.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd) 4835 } else if healthy != 0 { 4836 if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(dstate.RequireProgressBy) { 4837 dstate.RequireProgressBy = d 4838 } 4839 } 4840 } 4841 4842 // Upsert the deployment 4843 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 4844 return err 4845 } 4846 4847 return nil 4848 } 4849 4850 // updateSummaryWithAlloc updates the job summary when allocations are updated 4851 // or inserted 4852 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 4853 existingAlloc *structs.Allocation, txn *txn) error { 4854 4855 // We don't have to update the summary if the job is missing 4856 if alloc.Job == nil { 4857 return nil 4858 } 4859 4860 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 4861 if err != nil { 4862 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 4863 } 4864 4865 if summaryRaw == nil { 4866 // Check if the job is de-registered 4867 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 4868 if err != nil { 4869 return fmt.Errorf("unable to query job: %v", err) 4870 } 4871 4872 // If the job is de-registered then we skip updating it's summary 4873 if rawJob == nil { 4874 return nil 4875 } 4876 4877 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 4878 } 4879 4880 // Get a copy of the existing summary 4881 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 4882 4883 // Not updating the job summary because the allocation doesn't belong to the 4884 // currently registered job 4885 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 4886 return nil 4887 } 4888 4889 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 4890 if !ok { 4891 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 4892 } 4893 4894 summaryChanged := false 4895 if existingAlloc == nil { 4896 switch alloc.DesiredStatus { 4897 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 4898 s.logger.Error("new allocation inserted into state store with bad desired status", 4899 "alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus) 4900 } 4901 switch alloc.ClientStatus { 4902 case structs.AllocClientStatusPending: 4903 tgSummary.Starting += 1 4904 if tgSummary.Queued > 0 { 4905 tgSummary.Queued -= 1 4906 } 4907 summaryChanged = true 4908 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 4909 structs.AllocClientStatusComplete: 4910 s.logger.Error("new allocation inserted into state store with bad client status", 4911 "alloc_id", alloc.ID, "client_status", alloc.ClientStatus) 4912 } 4913 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 4914 // Incrementing the client of the bin of the current state 4915 switch alloc.ClientStatus { 4916 case structs.AllocClientStatusRunning: 4917 tgSummary.Running += 1 4918 case structs.AllocClientStatusFailed: 4919 tgSummary.Failed += 1 4920 case structs.AllocClientStatusPending: 4921 tgSummary.Starting += 1 4922 case structs.AllocClientStatusComplete: 4923 tgSummary.Complete += 1 4924 case structs.AllocClientStatusLost: 4925 tgSummary.Lost += 1 4926 } 4927 4928 // Decrementing the count of the bin of the last state 4929 switch existingAlloc.ClientStatus { 4930 case structs.AllocClientStatusRunning: 4931 if tgSummary.Running > 0 { 4932 tgSummary.Running -= 1 4933 } 4934 case structs.AllocClientStatusPending: 4935 if tgSummary.Starting > 0 { 4936 tgSummary.Starting -= 1 4937 } 4938 case structs.AllocClientStatusLost: 4939 if tgSummary.Lost > 0 { 4940 tgSummary.Lost -= 1 4941 } 4942 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 4943 default: 4944 s.logger.Error("invalid old client status for allocation", 4945 "alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus) 4946 } 4947 summaryChanged = true 4948 } 4949 jobSummary.Summary[alloc.TaskGroup] = tgSummary 4950 4951 if summaryChanged { 4952 jobSummary.ModifyIndex = index 4953 4954 s.updatePluginWithJobSummary(index, jobSummary, alloc, txn) 4955 4956 // Update the indexes table for job summary 4957 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4958 return fmt.Errorf("index update failed: %v", err) 4959 } 4960 4961 if err := txn.Insert("job_summary", jobSummary); err != nil { 4962 return fmt.Errorf("updating job summary failed: %v", err) 4963 } 4964 } 4965 4966 return nil 4967 } 4968 4969 // updatePluginWithAlloc updates the CSI plugins for an alloc when the 4970 // allocation is updated or inserted with a terminal server status. 4971 func (s *StateStore) updatePluginWithAlloc(index uint64, alloc *structs.Allocation, 4972 txn *txn) error { 4973 if !alloc.ServerTerminalStatus() { 4974 return nil 4975 } 4976 4977 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 4978 for _, t := range tg.Tasks { 4979 if t.CSIPluginConfig != nil { 4980 pluginID := t.CSIPluginConfig.ID 4981 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 4982 if err != nil { 4983 return err 4984 } 4985 if plug == nil { 4986 // plugin may not have been created because it never 4987 // became healthy, just move on 4988 return nil 4989 } 4990 plug = plug.Copy() 4991 err = plug.DeleteAlloc(alloc.ID, alloc.NodeID) 4992 if err != nil { 4993 return err 4994 } 4995 err = updateOrGCPlugin(index, txn, plug) 4996 if err != nil { 4997 return err 4998 } 4999 } 5000 } 5001 5002 return nil 5003 } 5004 5005 // updatePluginWithJobSummary updates the CSI plugins for a job when the 5006 // job summary is updated by an alloc 5007 func (s *StateStore) updatePluginWithJobSummary(index uint64, summary *structs.JobSummary, alloc *structs.Allocation, 5008 txn *txn) error { 5009 5010 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 5011 if tg == nil { 5012 return nil 5013 } 5014 5015 for _, t := range tg.Tasks { 5016 if t.CSIPluginConfig != nil { 5017 pluginID := t.CSIPluginConfig.ID 5018 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 5019 if err != nil { 5020 return err 5021 } 5022 if plug == nil { 5023 plug = structs.NewCSIPlugin(pluginID, index) 5024 } else { 5025 plug = plug.Copy() 5026 } 5027 5028 plug.UpdateExpectedWithJob(alloc.Job, summary, alloc.ServerTerminalStatus()) 5029 err = updateOrGCPlugin(index, txn, plug) 5030 if err != nil { 5031 return err 5032 } 5033 } 5034 } 5035 5036 return nil 5037 } 5038 5039 // UpsertACLPolicies is used to create or update a set of ACL policies 5040 func (s *StateStore) UpsertACLPolicies(msgType structs.MessageType, index uint64, policies []*structs.ACLPolicy) error { 5041 txn := s.db.WriteTxnMsgT(msgType, index) 5042 defer txn.Abort() 5043 5044 for _, policy := range policies { 5045 // Ensure the policy hash is non-nil. This should be done outside the state store 5046 // for performance reasons, but we check here for defense in depth. 5047 if len(policy.Hash) == 0 { 5048 policy.SetHash() 5049 } 5050 5051 // Check if the policy already exists 5052 existing, err := txn.First("acl_policy", "id", policy.Name) 5053 if err != nil { 5054 return fmt.Errorf("policy lookup failed: %v", err) 5055 } 5056 5057 // Update all the indexes 5058 if existing != nil { 5059 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 5060 policy.ModifyIndex = index 5061 } else { 5062 policy.CreateIndex = index 5063 policy.ModifyIndex = index 5064 } 5065 5066 // Update the policy 5067 if err := txn.Insert("acl_policy", policy); err != nil { 5068 return fmt.Errorf("upserting policy failed: %v", err) 5069 } 5070 } 5071 5072 // Update the indexes tabl 5073 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5074 return fmt.Errorf("index update failed: %v", err) 5075 } 5076 5077 return txn.Commit() 5078 } 5079 5080 // DeleteACLPolicies deletes the policies with the given names 5081 func (s *StateStore) DeleteACLPolicies(msgType structs.MessageType, index uint64, names []string) error { 5082 txn := s.db.WriteTxnMsgT(msgType, index) 5083 defer txn.Abort() 5084 5085 // Delete the policy 5086 for _, name := range names { 5087 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 5088 return fmt.Errorf("deleting acl policy failed: %v", err) 5089 } 5090 } 5091 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5092 return fmt.Errorf("index update failed: %v", err) 5093 } 5094 return txn.Commit() 5095 } 5096 5097 // ACLPolicyByName is used to lookup a policy by name 5098 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 5099 txn := s.db.ReadTxn() 5100 5101 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 5102 if err != nil { 5103 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5104 } 5105 ws.Add(watchCh) 5106 5107 if existing != nil { 5108 return existing.(*structs.ACLPolicy), nil 5109 } 5110 return nil, nil 5111 } 5112 5113 // ACLPolicyByNamePrefix is used to lookup policies by prefix 5114 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 5115 txn := s.db.ReadTxn() 5116 5117 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 5118 if err != nil { 5119 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5120 } 5121 ws.Add(iter.WatchCh()) 5122 5123 return iter, nil 5124 } 5125 5126 // ACLPolicies returns an iterator over all the acl policies 5127 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5128 txn := s.db.ReadTxn() 5129 5130 // Walk the entire table 5131 iter, err := txn.Get("acl_policy", "id") 5132 if err != nil { 5133 return nil, err 5134 } 5135 ws.Add(iter.WatchCh()) 5136 return iter, nil 5137 } 5138 5139 // UpsertACLTokens is used to create or update a set of ACL tokens 5140 func (s *StateStore) UpsertACLTokens(msgType structs.MessageType, index uint64, tokens []*structs.ACLToken) error { 5141 txn := s.db.WriteTxnMsgT(msgType, index) 5142 defer txn.Abort() 5143 5144 for _, token := range tokens { 5145 // Ensure the policy hash is non-nil. This should be done outside the state store 5146 // for performance reasons, but we check here for defense in depth. 5147 if len(token.Hash) == 0 { 5148 token.SetHash() 5149 } 5150 5151 // Check if the token already exists 5152 existing, err := txn.First("acl_token", "id", token.AccessorID) 5153 if err != nil { 5154 return fmt.Errorf("token lookup failed: %v", err) 5155 } 5156 5157 // Update all the indexes 5158 if existing != nil { 5159 existTK := existing.(*structs.ACLToken) 5160 token.CreateIndex = existTK.CreateIndex 5161 token.ModifyIndex = index 5162 5163 // Do not allow SecretID or create time to change 5164 token.SecretID = existTK.SecretID 5165 token.CreateTime = existTK.CreateTime 5166 5167 } else { 5168 token.CreateIndex = index 5169 token.ModifyIndex = index 5170 } 5171 5172 // Update the token 5173 if err := txn.Insert("acl_token", token); err != nil { 5174 return fmt.Errorf("upserting token failed: %v", err) 5175 } 5176 } 5177 5178 // Update the indexes table 5179 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5180 return fmt.Errorf("index update failed: %v", err) 5181 } 5182 return txn.Commit() 5183 } 5184 5185 // DeleteACLTokens deletes the tokens with the given accessor ids 5186 func (s *StateStore) DeleteACLTokens(msgType structs.MessageType, index uint64, ids []string) error { 5187 txn := s.db.WriteTxnMsgT(msgType, index) 5188 defer txn.Abort() 5189 5190 // Delete the tokens 5191 for _, id := range ids { 5192 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 5193 return fmt.Errorf("deleting acl token failed: %v", err) 5194 } 5195 } 5196 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5197 return fmt.Errorf("index update failed: %v", err) 5198 } 5199 return txn.Commit() 5200 } 5201 5202 // ACLTokenByAccessorID is used to lookup a token by accessor ID 5203 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 5204 if id == "" { 5205 return nil, fmt.Errorf("acl token lookup failed: missing accessor id") 5206 } 5207 5208 txn := s.db.ReadTxn() 5209 5210 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 5211 if err != nil { 5212 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5213 } 5214 ws.Add(watchCh) 5215 5216 if existing != nil { 5217 return existing.(*structs.ACLToken), nil 5218 } 5219 return nil, nil 5220 } 5221 5222 // ACLTokenBySecretID is used to lookup a token by secret ID 5223 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 5224 if secretID == "" { 5225 return nil, fmt.Errorf("acl token lookup failed: missing secret id") 5226 } 5227 5228 txn := s.db.ReadTxn() 5229 5230 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 5231 if err != nil { 5232 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5233 } 5234 ws.Add(watchCh) 5235 5236 if existing != nil { 5237 return existing.(*structs.ACLToken), nil 5238 } 5239 return nil, nil 5240 } 5241 5242 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 5243 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 5244 txn := s.db.ReadTxn() 5245 5246 iter, err := txn.Get("acl_token", "id_prefix", prefix) 5247 if err != nil { 5248 return nil, fmt.Errorf("acl token lookup failed: %v", err) 5249 } 5250 ws.Add(iter.WatchCh()) 5251 return iter, nil 5252 } 5253 5254 // ACLTokens returns an iterator over all the tokens 5255 func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5256 txn := s.db.ReadTxn() 5257 5258 // Walk the entire table 5259 iter, err := txn.Get("acl_token", "id") 5260 if err != nil { 5261 return nil, err 5262 } 5263 ws.Add(iter.WatchCh()) 5264 return iter, nil 5265 } 5266 5267 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 5268 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) { 5269 txn := s.db.ReadTxn() 5270 5271 // Walk the entire table 5272 iter, err := txn.Get("acl_token", "global", globalVal) 5273 if err != nil { 5274 return nil, err 5275 } 5276 ws.Add(iter.WatchCh()) 5277 return iter, nil 5278 } 5279 5280 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 5281 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 5282 txn := s.db.ReadTxn() 5283 5284 // Lookup the bootstrap sentinel 5285 out, err := txn.First("index", "id", "acl_token_bootstrap") 5286 if err != nil { 5287 return false, 0, err 5288 } 5289 5290 // No entry, we haven't bootstrapped yet 5291 if out == nil { 5292 return true, 0, nil 5293 } 5294 5295 // Return the reset index if we've already bootstrapped 5296 return false, out.(*IndexEntry).Value, nil 5297 } 5298 5299 // BootstrapACLToken is used to create an initial ACL token 5300 func (s *StateStore) BootstrapACLTokens(msgType structs.MessageType, index uint64, resetIndex uint64, token *structs.ACLToken) error { 5301 txn := s.db.WriteTxnMsgT(msgType, index) 5302 defer txn.Abort() 5303 5304 // Check if we have already done a bootstrap 5305 existing, err := txn.First("index", "id", "acl_token_bootstrap") 5306 if err != nil { 5307 return fmt.Errorf("bootstrap check failed: %v", err) 5308 } 5309 if existing != nil { 5310 if resetIndex == 0 { 5311 return fmt.Errorf("ACL bootstrap already done") 5312 } else if resetIndex != existing.(*IndexEntry).Value { 5313 return fmt.Errorf("Invalid reset index for ACL bootstrap") 5314 } 5315 } 5316 5317 // Update the Create/Modify time 5318 token.CreateIndex = index 5319 token.ModifyIndex = index 5320 5321 // Insert the token 5322 if err := txn.Insert("acl_token", token); err != nil { 5323 return fmt.Errorf("upserting token failed: %v", err) 5324 } 5325 5326 // Update the indexes table, prevents future bootstrap until reset 5327 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 5328 return fmt.Errorf("index update failed: %v", err) 5329 } 5330 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 5331 return fmt.Errorf("index update failed: %v", err) 5332 } 5333 return txn.Commit() 5334 } 5335 5336 // SchedulerConfig is used to get the current Scheduler configuration. 5337 func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) { 5338 tx := s.db.ReadTxn() 5339 defer tx.Abort() 5340 5341 // Get the scheduler config 5342 c, err := tx.First("scheduler_config", "id") 5343 if err != nil { 5344 return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err) 5345 } 5346 5347 config, ok := c.(*structs.SchedulerConfiguration) 5348 if !ok { 5349 return 0, nil, nil 5350 } 5351 5352 return config.ModifyIndex, config, nil 5353 } 5354 5355 // SchedulerSetConfig is used to set the current Scheduler configuration. 5356 func (s *StateStore) SchedulerSetConfig(index uint64, config *structs.SchedulerConfiguration) error { 5357 tx := s.db.WriteTxn(index) 5358 defer tx.Abort() 5359 5360 s.schedulerSetConfigTxn(index, tx, config) 5361 5362 return tx.Commit() 5363 } 5364 5365 func (s *StateStore) ClusterMetadata(ws memdb.WatchSet) (*structs.ClusterMetadata, error) { 5366 txn := s.db.ReadTxn() 5367 defer txn.Abort() 5368 5369 // Get the cluster metadata 5370 watchCh, m, err := txn.FirstWatch("cluster_meta", "id") 5371 if err != nil { 5372 return nil, errors.Wrap(err, "failed cluster metadata lookup") 5373 } 5374 ws.Add(watchCh) 5375 5376 if m != nil { 5377 return m.(*structs.ClusterMetadata), nil 5378 } 5379 5380 return nil, nil 5381 } 5382 5383 func (s *StateStore) ClusterSetMetadata(index uint64, meta *structs.ClusterMetadata) error { 5384 txn := s.db.WriteTxn(index) 5385 defer txn.Abort() 5386 5387 if err := s.setClusterMetadata(txn, meta); err != nil { 5388 return errors.Wrap(err, "set cluster metadata failed") 5389 } 5390 5391 return txn.Commit() 5392 } 5393 5394 // WithWriteTransaction executes the passed function within a write transaction, 5395 // and returns its result. If the invocation returns no error, the transaction 5396 // is committed; otherwise, it's aborted. 5397 func (s *StateStore) WithWriteTransaction(msgType structs.MessageType, index uint64, fn func(Txn) error) error { 5398 tx := s.db.WriteTxnMsgT(msgType, index) 5399 defer tx.Abort() 5400 5401 err := fn(tx) 5402 if err == nil { 5403 return tx.Commit() 5404 } 5405 return err 5406 } 5407 5408 // SchedulerCASConfig is used to update the scheduler configuration with a 5409 // given Raft index. If the CAS index specified is not equal to the last observed index 5410 // for the config, then the call is a noop. 5411 func (s *StateStore) SchedulerCASConfig(index, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) { 5412 tx := s.db.WriteTxn(index) 5413 defer tx.Abort() 5414 5415 // Check for an existing config 5416 existing, err := tx.First("scheduler_config", "id") 5417 if err != nil { 5418 return false, fmt.Errorf("failed scheduler config lookup: %s", err) 5419 } 5420 5421 // If the existing index does not match the provided CAS 5422 // index arg, then we shouldn't update anything and can safely 5423 // return early here. 5424 e, ok := existing.(*structs.SchedulerConfiguration) 5425 if !ok || (e != nil && e.ModifyIndex != cidx) { 5426 return false, nil 5427 } 5428 5429 s.schedulerSetConfigTxn(index, tx, config) 5430 5431 if err := tx.Commit(); err != nil { 5432 return false, err 5433 } 5434 return true, nil 5435 } 5436 5437 func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *txn, config *structs.SchedulerConfiguration) error { 5438 // Check for an existing config 5439 existing, err := tx.First("scheduler_config", "id") 5440 if err != nil { 5441 return fmt.Errorf("failed scheduler config lookup: %s", err) 5442 } 5443 5444 // Set the indexes. 5445 if existing != nil { 5446 config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex 5447 } else { 5448 config.CreateIndex = idx 5449 } 5450 config.ModifyIndex = idx 5451 5452 if err := tx.Insert("scheduler_config", config); err != nil { 5453 return fmt.Errorf("failed updating scheduler config: %s", err) 5454 } 5455 return nil 5456 } 5457 5458 func (s *StateStore) setClusterMetadata(txn *txn, meta *structs.ClusterMetadata) error { 5459 // Check for an existing config, if it exists, sanity check the cluster ID matches 5460 existing, err := txn.First("cluster_meta", "id") 5461 if err != nil { 5462 return fmt.Errorf("failed cluster meta lookup: %v", err) 5463 } 5464 5465 if existing != nil { 5466 existingClusterID := existing.(*structs.ClusterMetadata).ClusterID 5467 if meta.ClusterID != existingClusterID && existingClusterID != "" { 5468 // there is a bug in cluster ID detection 5469 return fmt.Errorf("refusing to set new cluster id, previous: %s, new: %s", existingClusterID, meta.ClusterID) 5470 } 5471 } 5472 5473 // update is technically a noop, unless someday we add more / mutable fields 5474 if err := txn.Insert("cluster_meta", meta); err != nil { 5475 return fmt.Errorf("set cluster metadata failed: %v", err) 5476 } 5477 5478 return nil 5479 } 5480 5481 // UpsertScalingPolicy is used to insert a new scaling policy. 5482 func (s *StateStore) UpsertScalingPolicies(index uint64, scalingPolicies []*structs.ScalingPolicy) error { 5483 txn := s.db.WriteTxn(index) 5484 defer txn.Abort() 5485 5486 if err := s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn); err != nil { 5487 return err 5488 } 5489 5490 return txn.Commit() 5491 } 5492 5493 // upsertScalingPolicy is used to insert a new scaling policy. 5494 func (s *StateStore) UpsertScalingPoliciesTxn(index uint64, scalingPolicies []*structs.ScalingPolicy, 5495 txn *txn) error { 5496 5497 hadUpdates := false 5498 5499 for _, policy := range scalingPolicies { 5500 // Check if the scaling policy already exists 5501 // Policy uniqueness is based on target and type 5502 it, err := txn.Get("scaling_policy", "target", 5503 policy.Target[structs.ScalingTargetNamespace], 5504 policy.Target[structs.ScalingTargetJob], 5505 policy.Target[structs.ScalingTargetGroup], 5506 policy.Target[structs.ScalingTargetTask], 5507 ) 5508 if err != nil { 5509 return fmt.Errorf("scaling policy lookup failed: %v", err) 5510 } 5511 5512 // Check if type matches 5513 var existing *structs.ScalingPolicy 5514 for raw := it.Next(); raw != nil; raw = it.Next() { 5515 p := raw.(*structs.ScalingPolicy) 5516 if p.Type == policy.Type { 5517 existing = p 5518 break 5519 } 5520 } 5521 5522 // Setup the indexes correctly 5523 if existing != nil { 5524 if !existing.Diff(policy) { 5525 continue 5526 } 5527 policy.ID = existing.ID 5528 policy.CreateIndex = existing.CreateIndex 5529 } else { 5530 // policy.ID must have been set already in Job.Register before log apply 5531 policy.CreateIndex = index 5532 } 5533 policy.ModifyIndex = index 5534 5535 // Insert the scaling policy 5536 hadUpdates = true 5537 if err := txn.Insert("scaling_policy", policy); err != nil { 5538 return err 5539 } 5540 } 5541 5542 // Update the indexes table for scaling policy if we updated any policies 5543 if hadUpdates { 5544 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 5545 return fmt.Errorf("index update failed: %v", err) 5546 } 5547 } 5548 5549 return nil 5550 } 5551 5552 // NamespaceByName is used to lookup a namespace by name 5553 func (s *StateStore) NamespaceByName(ws memdb.WatchSet, name string) (*structs.Namespace, error) { 5554 txn := s.db.ReadTxn() 5555 return s.namespaceByNameImpl(ws, txn, name) 5556 } 5557 5558 // namespaceByNameImpl is used to lookup a namespace by name 5559 func (s *StateStore) namespaceByNameImpl(ws memdb.WatchSet, txn *txn, name string) (*structs.Namespace, error) { 5560 watchCh, existing, err := txn.FirstWatch(TableNamespaces, "id", name) 5561 if err != nil { 5562 return nil, fmt.Errorf("namespace lookup failed: %v", err) 5563 } 5564 ws.Add(watchCh) 5565 5566 if existing != nil { 5567 return existing.(*structs.Namespace), nil 5568 } 5569 return nil, nil 5570 } 5571 5572 // namespaceExists returns whether a namespace exists 5573 func (s *StateStore) namespaceExists(txn *txn, namespace string) (bool, error) { 5574 if namespace == structs.DefaultNamespace { 5575 return true, nil 5576 } 5577 5578 existing, err := txn.First(TableNamespaces, "id", namespace) 5579 if err != nil { 5580 return false, fmt.Errorf("namespace lookup failed: %v", err) 5581 } 5582 5583 return existing != nil, nil 5584 } 5585 5586 // NamespacesByNamePrefix is used to lookup namespaces by prefix 5587 func (s *StateStore) NamespacesByNamePrefix(ws memdb.WatchSet, namePrefix string) (memdb.ResultIterator, error) { 5588 txn := s.db.ReadTxn() 5589 5590 iter, err := txn.Get(TableNamespaces, "id_prefix", namePrefix) 5591 if err != nil { 5592 return nil, fmt.Errorf("namespaces lookup failed: %v", err) 5593 } 5594 ws.Add(iter.WatchCh()) 5595 5596 return iter, nil 5597 } 5598 5599 // Namespaces returns an iterator over all the namespaces 5600 func (s *StateStore) Namespaces(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5601 txn := s.db.ReadTxn() 5602 5603 // Walk the entire namespace table 5604 iter, err := txn.Get(TableNamespaces, "id") 5605 if err != nil { 5606 return nil, err 5607 } 5608 ws.Add(iter.WatchCh()) 5609 return iter, nil 5610 } 5611 5612 func (s *StateStore) NamespaceNames() ([]string, error) { 5613 it, err := s.Namespaces(nil) 5614 if err != nil { 5615 return nil, err 5616 } 5617 5618 nses := []string{} 5619 for { 5620 next := it.Next() 5621 if next == nil { 5622 break 5623 } 5624 ns := next.(*structs.Namespace) 5625 nses = append(nses, ns.Name) 5626 } 5627 5628 return nses, nil 5629 } 5630 5631 // UpsertNamespace is used to register or update a set of namespaces 5632 func (s *StateStore) UpsertNamespaces(index uint64, namespaces []*structs.Namespace) error { 5633 txn := s.db.WriteTxn(index) 5634 defer txn.Abort() 5635 5636 for _, ns := range namespaces { 5637 if err := s.upsertNamespaceImpl(index, txn, ns); err != nil { 5638 return err 5639 } 5640 } 5641 5642 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 5643 return fmt.Errorf("index update failed: %v", err) 5644 } 5645 5646 return txn.Commit() 5647 } 5648 5649 // upsertNamespaceImpl is used to upsert a namespace 5650 func (s *StateStore) upsertNamespaceImpl(index uint64, txn *txn, namespace *structs.Namespace) error { 5651 // Ensure the namespace hash is non-nil. This should be done outside the state store 5652 // for performance reasons, but we check here for defense in depth. 5653 ns := namespace 5654 if len(ns.Hash) == 0 { 5655 ns.SetHash() 5656 } 5657 5658 // Check if the namespace already exists 5659 existing, err := txn.First(TableNamespaces, "id", ns.Name) 5660 if err != nil { 5661 return fmt.Errorf("namespace lookup failed: %v", err) 5662 } 5663 5664 // Setup the indexes correctly and determine which quotas need to be 5665 // reconciled 5666 var oldQuota string 5667 if existing != nil { 5668 exist := existing.(*structs.Namespace) 5669 ns.CreateIndex = exist.CreateIndex 5670 ns.ModifyIndex = index 5671 5672 // Grab the old quota on the namespace 5673 oldQuota = exist.Quota 5674 } else { 5675 ns.CreateIndex = index 5676 ns.ModifyIndex = index 5677 } 5678 5679 // Validate that the quota on the new namespace exists 5680 if ns.Quota != "" { 5681 exists, err := s.quotaSpecExists(txn, ns.Quota) 5682 if err != nil { 5683 return fmt.Errorf("looking up namespace quota %q failed: %v", ns.Quota, err) 5684 } else if !exists { 5685 return fmt.Errorf("namespace %q using non-existent quota %q", ns.Name, ns.Quota) 5686 } 5687 } 5688 5689 // Insert the namespace 5690 if err := txn.Insert(TableNamespaces, ns); err != nil { 5691 return fmt.Errorf("namespace insert failed: %v", err) 5692 } 5693 5694 // Reconcile changed quotas 5695 return s.quotaReconcile(index, txn, ns.Quota, oldQuota) 5696 } 5697 5698 // DeleteNamespaces is used to remove a set of namespaces 5699 func (s *StateStore) DeleteNamespaces(index uint64, names []string) error { 5700 txn := s.db.WriteTxn(index) 5701 defer txn.Abort() 5702 5703 for _, name := range names { 5704 // Lookup the namespace 5705 existing, err := txn.First(TableNamespaces, "id", name) 5706 if err != nil { 5707 return fmt.Errorf("namespace lookup failed: %v", err) 5708 } 5709 if existing == nil { 5710 return fmt.Errorf("namespace not found") 5711 } 5712 5713 ns := existing.(*structs.Namespace) 5714 if ns.Name == structs.DefaultNamespace { 5715 return fmt.Errorf("default namespace can not be deleted") 5716 } 5717 5718 // Ensure that the namespace doesn't have any non-terminal jobs 5719 iter, err := s.jobsByNamespaceImpl(nil, name, txn) 5720 if err != nil { 5721 return err 5722 } 5723 5724 for { 5725 raw := iter.Next() 5726 if raw == nil { 5727 break 5728 } 5729 job := raw.(*structs.Job) 5730 5731 if job.Status != structs.JobStatusDead { 5732 return fmt.Errorf("namespace %q contains at least one non-terminal job %q. "+ 5733 "All jobs must be terminal in namespace before it can be deleted", name, job.ID) 5734 } 5735 } 5736 5737 // Delete the namespace 5738 if err := txn.Delete(TableNamespaces, existing); err != nil { 5739 return fmt.Errorf("namespace deletion failed: %v", err) 5740 } 5741 } 5742 5743 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 5744 return fmt.Errorf("index update failed: %v", err) 5745 } 5746 5747 return txn.Commit() 5748 } 5749 5750 func (s *StateStore) DeleteScalingPolicies(index uint64, ids []string) error { 5751 txn := s.db.WriteTxn(index) 5752 defer txn.Abort() 5753 5754 err := s.DeleteScalingPoliciesTxn(index, ids, txn) 5755 if err == nil { 5756 return txn.Commit() 5757 } 5758 5759 return err 5760 } 5761 5762 // DeleteScalingPolicies is used to delete a set of scaling policies by ID 5763 func (s *StateStore) DeleteScalingPoliciesTxn(index uint64, ids []string, txn *txn) error { 5764 if len(ids) == 0 { 5765 return nil 5766 } 5767 5768 for _, id := range ids { 5769 // Lookup the scaling policy 5770 existing, err := txn.First("scaling_policy", "id", id) 5771 if err != nil { 5772 return fmt.Errorf("scaling policy lookup failed: %v", err) 5773 } 5774 if existing == nil { 5775 return fmt.Errorf("scaling policy not found") 5776 } 5777 5778 // Delete the scaling policy 5779 if err := txn.Delete("scaling_policy", existing); err != nil { 5780 return fmt.Errorf("scaling policy delete failed: %v", err) 5781 } 5782 } 5783 5784 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 5785 return fmt.Errorf("index update failed: %v", err) 5786 } 5787 5788 return nil 5789 } 5790 5791 // ScalingPolicies returns an iterator over all the scaling policies 5792 func (s *StateStore) ScalingPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5793 txn := s.db.ReadTxn() 5794 5795 // Walk the entire scaling_policy table 5796 iter, err := txn.Get("scaling_policy", "id") 5797 if err != nil { 5798 return nil, err 5799 } 5800 5801 ws.Add(iter.WatchCh()) 5802 5803 return iter, nil 5804 } 5805 5806 // ScalingPoliciesByTypePrefix returns an iterator over scaling policies with a certain type prefix. 5807 func (s *StateStore) ScalingPoliciesByTypePrefix(ws memdb.WatchSet, t string) (memdb.ResultIterator, error) { 5808 txn := s.db.ReadTxn() 5809 5810 iter, err := txn.Get("scaling_policy", "type_prefix", t) 5811 if err != nil { 5812 return nil, err 5813 } 5814 5815 ws.Add(iter.WatchCh()) 5816 return iter, nil 5817 } 5818 5819 func (s *StateStore) ScalingPoliciesByNamespace(ws memdb.WatchSet, namespace, typ string) (memdb.ResultIterator, error) { 5820 txn := s.db.ReadTxn() 5821 5822 iter, err := txn.Get("scaling_policy", "target_prefix", namespace) 5823 if err != nil { 5824 return nil, err 5825 } 5826 5827 ws.Add(iter.WatchCh()) 5828 5829 // Wrap the iterator in a filter to exact match the namespace 5830 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 5831 5832 // If policy type is specified as well, wrap again 5833 if typ != "" { 5834 iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool { 5835 p, ok := raw.(*structs.ScalingPolicy) 5836 if !ok { 5837 return true 5838 } 5839 return !strings.HasPrefix(p.Type, typ) 5840 }) 5841 } 5842 5843 return iter, nil 5844 } 5845 5846 func (s *StateStore) ScalingPoliciesByJob(ws memdb.WatchSet, namespace, jobID, policyType string) (memdb.ResultIterator, 5847 error) { 5848 txn := s.db.ReadTxn() 5849 iter, err := s.ScalingPoliciesByJobTxn(ws, namespace, jobID, txn) 5850 if err != nil { 5851 return nil, err 5852 } 5853 5854 if policyType == "" { 5855 return iter, nil 5856 } 5857 5858 filter := func(raw interface{}) bool { 5859 p, ok := raw.(*structs.ScalingPolicy) 5860 if !ok { 5861 return true 5862 } 5863 return policyType != p.Type 5864 } 5865 5866 return memdb.NewFilterIterator(iter, filter), nil 5867 } 5868 5869 func (s *StateStore) ScalingPoliciesByJobTxn(ws memdb.WatchSet, namespace, jobID string, 5870 txn *txn) (memdb.ResultIterator, error) { 5871 5872 iter, err := txn.Get("scaling_policy", "target_prefix", namespace, jobID) 5873 if err != nil { 5874 return nil, err 5875 } 5876 5877 ws.Add(iter.WatchCh()) 5878 5879 filter := func(raw interface{}) bool { 5880 d, ok := raw.(*structs.ScalingPolicy) 5881 if !ok { 5882 return true 5883 } 5884 5885 return d.Target[structs.ScalingTargetJob] != jobID 5886 } 5887 5888 // Wrap the iterator in a filter 5889 wrap := memdb.NewFilterIterator(iter, filter) 5890 return wrap, nil 5891 } 5892 5893 func (s *StateStore) ScalingPolicyByID(ws memdb.WatchSet, id string) (*structs.ScalingPolicy, error) { 5894 txn := s.db.ReadTxn() 5895 5896 watchCh, existing, err := txn.FirstWatch("scaling_policy", "id", id) 5897 if err != nil { 5898 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 5899 } 5900 ws.Add(watchCh) 5901 5902 if existing != nil { 5903 return existing.(*structs.ScalingPolicy), nil 5904 } 5905 5906 return nil, nil 5907 } 5908 5909 // ScalingPolicyByTargetAndType returns a fully-qualified policy against a target and policy type, 5910 // or nil if it does not exist. This method does not honor the watchset on the policy type, just the target. 5911 func (s *StateStore) ScalingPolicyByTargetAndType(ws memdb.WatchSet, target map[string]string, typ string) (*structs.ScalingPolicy, 5912 error) { 5913 txn := s.db.ReadTxn() 5914 5915 namespace := target[structs.ScalingTargetNamespace] 5916 job := target[structs.ScalingTargetJob] 5917 group := target[structs.ScalingTargetGroup] 5918 task := target[structs.ScalingTargetTask] 5919 5920 it, err := txn.Get("scaling_policy", "target", namespace, job, group, task) 5921 if err != nil { 5922 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 5923 } 5924 5925 ws.Add(it.WatchCh()) 5926 5927 // Check for type 5928 var existing *structs.ScalingPolicy 5929 for raw := it.Next(); raw != nil; raw = it.Next() { 5930 p := raw.(*structs.ScalingPolicy) 5931 if p.Type == typ { 5932 existing = p 5933 break 5934 } 5935 } 5936 5937 if existing != nil { 5938 return existing, nil 5939 } 5940 5941 return nil, nil 5942 } 5943 5944 func (s *StateStore) ScalingPoliciesByIDPrefix(ws memdb.WatchSet, namespace string, prefix string) (memdb.ResultIterator, error) { 5945 txn := s.db.ReadTxn() 5946 5947 iter, err := txn.Get("scaling_policy", "id_prefix", prefix) 5948 if err != nil { 5949 return nil, fmt.Errorf("scaling policy lookup failed: %v", err) 5950 } 5951 5952 ws.Add(iter.WatchCh()) 5953 5954 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 5955 5956 return iter, nil 5957 } 5958 5959 // scalingPolicyNamespaceFilter returns a filter function that filters all 5960 // scaling policies not targeting the given namespace. 5961 func scalingPolicyNamespaceFilter(namespace string) func(interface{}) bool { 5962 return func(raw interface{}) bool { 5963 p, ok := raw.(*structs.ScalingPolicy) 5964 if !ok { 5965 return true 5966 } 5967 5968 return p.Target[structs.ScalingTargetNamespace] != namespace 5969 } 5970 } 5971 5972 // StateSnapshot is used to provide a point-in-time snapshot 5973 type StateSnapshot struct { 5974 StateStore 5975 } 5976 5977 // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the 5978 // Allocation for each of the Allocation diffs and merges the updated attributes with 5979 // the existing Allocation, and attaches the Job provided 5980 func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error { 5981 for nodeID, allocs := range nodeAllocations { 5982 denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs) 5983 if err != nil { 5984 return err 5985 } 5986 5987 nodeAllocations[nodeID] = denormalizedAllocs 5988 } 5989 return nil 5990 } 5991 5992 // DenormalizeAllocationSlice queries the Allocation for each allocation diff 5993 // represented as an Allocation and merges the updated attributes with the existing 5994 // Allocation, and attaches the Job provided. 5995 // 5996 // This should only be called on terminal allocs, particularly stopped or preempted allocs 5997 func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) { 5998 allocDiffs := make([]*structs.AllocationDiff, len(allocs)) 5999 for i, alloc := range allocs { 6000 allocDiffs[i] = alloc.AllocationDiff() 6001 } 6002 6003 return s.DenormalizeAllocationDiffSlice(allocDiffs) 6004 } 6005 6006 // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges 6007 // the updated attributes with the existing Allocation, and attaches the Job provided. 6008 // 6009 // This should only be called on terminal alloc, particularly stopped or preempted allocs 6010 func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) { 6011 // Output index for denormalized Allocations 6012 j := 0 6013 6014 denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs)) 6015 for _, allocDiff := range allocDiffs { 6016 alloc, err := s.AllocByID(nil, allocDiff.ID) 6017 if err != nil { 6018 return nil, fmt.Errorf("alloc lookup failed: %v", err) 6019 } 6020 if alloc == nil { 6021 return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID) 6022 } 6023 6024 // Merge the updates to the Allocation. Don't update alloc.Job for terminal allocs 6025 // so alloc refers to the latest Job view before destruction and to ease handler implementations 6026 allocCopy := alloc.Copy() 6027 6028 if allocDiff.PreemptedByAllocation != "" { 6029 allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation 6030 allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation) 6031 allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict 6032 } else { 6033 // If alloc is a stopped alloc 6034 allocCopy.DesiredDescription = allocDiff.DesiredDescription 6035 allocCopy.DesiredStatus = structs.AllocDesiredStatusStop 6036 if allocDiff.ClientStatus != "" { 6037 allocCopy.ClientStatus = allocDiff.ClientStatus 6038 } 6039 if allocDiff.FollowupEvalID != "" { 6040 allocCopy.FollowupEvalID = allocDiff.FollowupEvalID 6041 } 6042 } 6043 if allocDiff.ModifyTime != 0 { 6044 allocCopy.ModifyTime = allocDiff.ModifyTime 6045 } 6046 6047 // Update the allocDiff in the slice to equal the denormalized alloc 6048 denormalizedAllocs[j] = allocCopy 6049 j++ 6050 } 6051 // Retain only the denormalized Allocations in the slice 6052 denormalizedAllocs = denormalizedAllocs[:j] 6053 return denormalizedAllocs, nil 6054 } 6055 6056 func getPreemptedAllocDesiredDescription(preemptedByAllocID string) string { 6057 return fmt.Sprintf("Preempted by alloc ID %v", preemptedByAllocID) 6058 } 6059 6060 // StateRestore is used to optimize the performance when 6061 // restoring state by only using a single large transaction 6062 // instead of thousands of sub transactions 6063 type StateRestore struct { 6064 txn *txn 6065 } 6066 6067 // Abort is used to abort the restore operation 6068 func (s *StateRestore) Abort() { 6069 s.txn.Abort() 6070 } 6071 6072 // Commit is used to commit the restore operation 6073 func (s *StateRestore) Commit() error { 6074 return s.txn.Commit() 6075 } 6076 6077 // NodeRestore is used to restore a node 6078 func (r *StateRestore) NodeRestore(node *structs.Node) error { 6079 if err := r.txn.Insert("nodes", node); err != nil { 6080 return fmt.Errorf("node insert failed: %v", err) 6081 } 6082 return nil 6083 } 6084 6085 // JobRestore is used to restore a job 6086 func (r *StateRestore) JobRestore(job *structs.Job) error { 6087 if err := r.txn.Insert("jobs", job); err != nil { 6088 return fmt.Errorf("job insert failed: %v", err) 6089 } 6090 return nil 6091 } 6092 6093 // EvalRestore is used to restore an evaluation 6094 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 6095 if err := r.txn.Insert("evals", eval); err != nil { 6096 return fmt.Errorf("eval insert failed: %v", err) 6097 } 6098 return nil 6099 } 6100 6101 // AllocRestore is used to restore an allocation 6102 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 6103 if err := r.txn.Insert("allocs", alloc); err != nil { 6104 return fmt.Errorf("alloc insert failed: %v", err) 6105 } 6106 return nil 6107 } 6108 6109 // IndexRestore is used to restore an index 6110 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 6111 if err := r.txn.Insert("index", idx); err != nil { 6112 return fmt.Errorf("index insert failed: %v", err) 6113 } 6114 return nil 6115 } 6116 6117 // PeriodicLaunchRestore is used to restore a periodic launch. 6118 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 6119 if err := r.txn.Insert("periodic_launch", launch); err != nil { 6120 return fmt.Errorf("periodic launch insert failed: %v", err) 6121 } 6122 return nil 6123 } 6124 6125 // JobSummaryRestore is used to restore a job summary 6126 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 6127 if err := r.txn.Insert("job_summary", jobSummary); err != nil { 6128 return fmt.Errorf("job summary insert failed: %v", err) 6129 } 6130 return nil 6131 } 6132 6133 // JobVersionRestore is used to restore a job version 6134 func (r *StateRestore) JobVersionRestore(version *structs.Job) error { 6135 if err := r.txn.Insert("job_version", version); err != nil { 6136 return fmt.Errorf("job version insert failed: %v", err) 6137 } 6138 return nil 6139 } 6140 6141 // DeploymentRestore is used to restore a deployment 6142 func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error { 6143 if err := r.txn.Insert("deployment", deployment); err != nil { 6144 return fmt.Errorf("deployment insert failed: %v", err) 6145 } 6146 return nil 6147 } 6148 6149 // VaultAccessorRestore is used to restore a vault accessor 6150 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 6151 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 6152 return fmt.Errorf("vault accessor insert failed: %v", err) 6153 } 6154 return nil 6155 } 6156 6157 // SITokenAccessorRestore is used to restore an SI token accessor 6158 func (r *StateRestore) SITokenAccessorRestore(accessor *structs.SITokenAccessor) error { 6159 if err := r.txn.Insert(siTokenAccessorTable, accessor); err != nil { 6160 return errors.Wrap(err, "si token accessor insert failed") 6161 } 6162 return nil 6163 } 6164 6165 // ACLPolicyRestore is used to restore an ACL policy 6166 func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error { 6167 if err := r.txn.Insert("acl_policy", policy); err != nil { 6168 return fmt.Errorf("inserting acl policy failed: %v", err) 6169 } 6170 return nil 6171 } 6172 6173 // ACLTokenRestore is used to restore an ACL token 6174 func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error { 6175 if err := r.txn.Insert("acl_token", token); err != nil { 6176 return fmt.Errorf("inserting acl token failed: %v", err) 6177 } 6178 return nil 6179 } 6180 6181 func (r *StateRestore) SchedulerConfigRestore(schedConfig *structs.SchedulerConfiguration) error { 6182 if err := r.txn.Insert("scheduler_config", schedConfig); err != nil { 6183 return fmt.Errorf("inserting scheduler config failed: %s", err) 6184 } 6185 return nil 6186 } 6187 6188 func (r *StateRestore) ClusterMetadataRestore(meta *structs.ClusterMetadata) error { 6189 if err := r.txn.Insert("cluster_meta", meta); err != nil { 6190 return fmt.Errorf("inserting cluster meta failed: %v", err) 6191 } 6192 return nil 6193 } 6194 6195 // ScalingPolicyRestore is used to restore a scaling policy 6196 func (r *StateRestore) ScalingPolicyRestore(scalingPolicy *structs.ScalingPolicy) error { 6197 if err := r.txn.Insert("scaling_policy", scalingPolicy); err != nil { 6198 return fmt.Errorf("scaling policy insert failed: %v", err) 6199 } 6200 return nil 6201 } 6202 6203 // CSIPluginRestore is used to restore a CSI plugin 6204 func (r *StateRestore) CSIPluginRestore(plugin *structs.CSIPlugin) error { 6205 if err := r.txn.Insert("csi_plugins", plugin); err != nil { 6206 return fmt.Errorf("csi plugin insert failed: %v", err) 6207 } 6208 return nil 6209 } 6210 6211 // CSIVolumeRestore is used to restore a CSI volume 6212 func (r *StateRestore) CSIVolumeRestore(volume *structs.CSIVolume) error { 6213 if err := r.txn.Insert("csi_volumes", volume); err != nil { 6214 return fmt.Errorf("csi volume insert failed: %v", err) 6215 } 6216 return nil 6217 } 6218 6219 // ScalingEventsRestore is used to restore scaling events for a job 6220 func (r *StateRestore) ScalingEventsRestore(jobEvents *structs.JobScalingEvents) error { 6221 if err := r.txn.Insert("scaling_event", jobEvents); err != nil { 6222 return fmt.Errorf("scaling event insert failed: %v", err) 6223 } 6224 return nil 6225 } 6226 6227 // NamespaceRestore is used to restore a namespace 6228 func (r *StateRestore) NamespaceRestore(ns *structs.Namespace) error { 6229 if err := r.txn.Insert(TableNamespaces, ns); err != nil { 6230 return fmt.Errorf("namespace insert failed: %v", err) 6231 } 6232 return nil 6233 }