github.com/rohankumardubey/nomad@v0.11.8/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "reflect" 7 "sync" 8 "time" 9 10 metrics "github.com/armon/go-metrics" 11 log "github.com/hashicorp/go-hclog" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/go-msgpack/codec" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/state" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/scheduler" 18 "github.com/hashicorp/raft" 19 "github.com/pkg/errors" 20 ) 21 22 const ( 23 // timeTableGranularity is the granularity of index to time tracking 24 timeTableGranularity = 5 * time.Minute 25 26 // timeTableLimit is the maximum limit of our tracking 27 timeTableLimit = 72 * time.Hour 28 ) 29 30 // SnapshotType is prefixed to a record in the FSM snapshot 31 // so that we can determine the type for restore 32 type SnapshotType byte 33 34 const ( 35 NodeSnapshot SnapshotType = iota 36 JobSnapshot 37 IndexSnapshot 38 EvalSnapshot 39 AllocSnapshot 40 TimeTableSnapshot 41 PeriodicLaunchSnapshot 42 JobSummarySnapshot 43 VaultAccessorSnapshot 44 JobVersionSnapshot 45 DeploymentSnapshot 46 ACLPolicySnapshot 47 ACLTokenSnapshot 48 SchedulerConfigSnapshot 49 ClusterMetadataSnapshot 50 ServiceIdentityTokenAccessorSnapshot 51 ScalingPolicySnapshot 52 CSIPluginSnapshot 53 CSIVolumeSnapshot 54 ScalingEventsSnapshot 55 ) 56 57 // LogApplier is the definition of a function that can apply a Raft log 58 type LogApplier func(buf []byte, index uint64) interface{} 59 60 // LogAppliers is a mapping of the Raft MessageType to the appropriate log 61 // applier 62 type LogAppliers map[structs.MessageType]LogApplier 63 64 // SnapshotRestorer is the definition of a function that can apply a Raft log 65 type SnapshotRestorer func(restore *state.StateRestore, dec *codec.Decoder) error 66 67 // SnapshotRestorers is a mapping of the SnapshotType to the appropriate 68 // snapshot restorer. 69 type SnapshotRestorers map[SnapshotType]SnapshotRestorer 70 71 // nomadFSM implements a finite state machine that is used 72 // along with Raft to provide strong consistency. We implement 73 // this outside the Server to avoid exposing this outside the package. 74 type nomadFSM struct { 75 evalBroker *EvalBroker 76 blockedEvals *BlockedEvals 77 periodicDispatcher *PeriodicDispatch 78 logger log.Logger 79 state *state.StateStore 80 timetable *TimeTable 81 82 // config is the FSM config 83 config *FSMConfig 84 85 // enterpriseAppliers holds the set of enterprise only LogAppliers 86 enterpriseAppliers LogAppliers 87 88 // enterpriseRestorers holds the set of enterprise only snapshot restorers 89 enterpriseRestorers SnapshotRestorers 90 91 // stateLock is only used to protect outside callers to State() from 92 // racing with Restore(), which is called by Raft (it puts in a totally 93 // new state store). Everything internal here is synchronized by the 94 // Raft side, so doesn't need to lock this. 95 stateLock sync.RWMutex 96 } 97 98 // nomadSnapshot is used to provide a snapshot of the current 99 // state in a way that can be accessed concurrently with operations 100 // that may modify the live state. 101 type nomadSnapshot struct { 102 snap *state.StateSnapshot 103 timetable *TimeTable 104 } 105 106 // snapshotHeader is the first entry in our snapshot 107 type snapshotHeader struct { 108 } 109 110 // FSMConfig is used to configure the FSM 111 type FSMConfig struct { 112 // EvalBroker is the evaluation broker evaluations should be added to 113 EvalBroker *EvalBroker 114 115 // Periodic is the periodic job dispatcher that periodic jobs should be 116 // added/removed from 117 Periodic *PeriodicDispatch 118 119 // BlockedEvals is the blocked eval tracker that blocked evaluations should 120 // be added to. 121 Blocked *BlockedEvals 122 123 // Logger is the logger used by the FSM 124 Logger log.Logger 125 126 // Region is the region of the server embedding the FSM 127 Region string 128 } 129 130 // NewFSMPath is used to construct a new FSM with a blank state 131 func NewFSM(config *FSMConfig) (*nomadFSM, error) { 132 // Create a state store 133 sconfig := &state.StateStoreConfig{ 134 Logger: config.Logger, 135 Region: config.Region, 136 } 137 state, err := state.NewStateStore(sconfig) 138 if err != nil { 139 return nil, err 140 } 141 142 fsm := &nomadFSM{ 143 evalBroker: config.EvalBroker, 144 periodicDispatcher: config.Periodic, 145 blockedEvals: config.Blocked, 146 logger: config.Logger.Named("fsm"), 147 config: config, 148 state: state, 149 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 150 enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8), 151 enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8), 152 } 153 154 // Register all the log applier functions 155 fsm.registerLogAppliers() 156 157 // Register all the snapshot restorer functions 158 fsm.registerSnapshotRestorers() 159 160 return fsm, nil 161 } 162 163 // Close is used to cleanup resources associated with the FSM 164 func (n *nomadFSM) Close() error { 165 return nil 166 } 167 168 // State is used to return a handle to the current state 169 func (n *nomadFSM) State() *state.StateStore { 170 n.stateLock.RLock() 171 defer n.stateLock.RUnlock() 172 return n.state 173 } 174 175 // TimeTable returns the time table of transactions 176 func (n *nomadFSM) TimeTable() *TimeTable { 177 return n.timetable 178 } 179 180 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 181 buf := log.Data 182 msgType := structs.MessageType(buf[0]) 183 184 // Witness this write 185 n.timetable.Witness(log.Index, time.Now().UTC()) 186 187 // Check if this message type should be ignored when unknown. This is 188 // used so that new commands can be added with developer control if older 189 // versions can safely ignore the command, or if they should crash. 190 ignoreUnknown := false 191 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 192 msgType &= ^structs.IgnoreUnknownTypeFlag 193 ignoreUnknown = true 194 } 195 196 switch msgType { 197 case structs.NodeRegisterRequestType: 198 return n.applyUpsertNode(buf[1:], log.Index) 199 case structs.NodeDeregisterRequestType: 200 return n.applyDeregisterNode(buf[1:], log.Index) 201 case structs.NodeUpdateStatusRequestType: 202 return n.applyStatusUpdate(buf[1:], log.Index) 203 case structs.NodeUpdateDrainRequestType: 204 return n.applyDrainUpdate(buf[1:], log.Index) 205 case structs.JobRegisterRequestType: 206 return n.applyUpsertJob(buf[1:], log.Index) 207 case structs.JobDeregisterRequestType: 208 return n.applyDeregisterJob(buf[1:], log.Index) 209 case structs.EvalUpdateRequestType: 210 return n.applyUpdateEval(buf[1:], log.Index) 211 case structs.EvalDeleteRequestType: 212 return n.applyDeleteEval(buf[1:], log.Index) 213 case structs.AllocUpdateRequestType: 214 return n.applyAllocUpdate(buf[1:], log.Index) 215 case structs.AllocClientUpdateRequestType: 216 return n.applyAllocClientUpdate(buf[1:], log.Index) 217 case structs.ReconcileJobSummariesRequestType: 218 return n.applyReconcileSummaries(buf[1:], log.Index) 219 case structs.VaultAccessorRegisterRequestType: 220 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 221 case structs.VaultAccessorDeregisterRequestType: 222 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 223 case structs.ApplyPlanResultsRequestType: 224 return n.applyPlanResults(buf[1:], log.Index) 225 case structs.DeploymentStatusUpdateRequestType: 226 return n.applyDeploymentStatusUpdate(buf[1:], log.Index) 227 case structs.DeploymentPromoteRequestType: 228 return n.applyDeploymentPromotion(buf[1:], log.Index) 229 case structs.DeploymentAllocHealthRequestType: 230 return n.applyDeploymentAllocHealth(buf[1:], log.Index) 231 case structs.DeploymentDeleteRequestType: 232 return n.applyDeploymentDelete(buf[1:], log.Index) 233 case structs.JobStabilityRequestType: 234 return n.applyJobStability(buf[1:], log.Index) 235 case structs.ACLPolicyUpsertRequestType: 236 return n.applyACLPolicyUpsert(buf[1:], log.Index) 237 case structs.ACLPolicyDeleteRequestType: 238 return n.applyACLPolicyDelete(buf[1:], log.Index) 239 case structs.ACLTokenUpsertRequestType: 240 return n.applyACLTokenUpsert(buf[1:], log.Index) 241 case structs.ACLTokenDeleteRequestType: 242 return n.applyACLTokenDelete(buf[1:], log.Index) 243 case structs.ACLTokenBootstrapRequestType: 244 return n.applyACLTokenBootstrap(buf[1:], log.Index) 245 case structs.AutopilotRequestType: 246 return n.applyAutopilotUpdate(buf[1:], log.Index) 247 case structs.UpsertNodeEventsType: 248 return n.applyUpsertNodeEvent(buf[1:], log.Index) 249 case structs.JobBatchDeregisterRequestType: 250 return n.applyBatchDeregisterJob(buf[1:], log.Index) 251 case structs.AllocUpdateDesiredTransitionRequestType: 252 return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) 253 case structs.NodeUpdateEligibilityRequestType: 254 return n.applyNodeEligibilityUpdate(buf[1:], log.Index) 255 case structs.BatchNodeUpdateDrainRequestType: 256 return n.applyBatchDrainUpdate(buf[1:], log.Index) 257 case structs.SchedulerConfigRequestType: 258 return n.applySchedulerConfigUpdate(buf[1:], log.Index) 259 case structs.NodeBatchDeregisterRequestType: 260 return n.applyDeregisterNodeBatch(buf[1:], log.Index) 261 case structs.ClusterMetadataRequestType: 262 return n.applyClusterMetadata(buf[1:], log.Index) 263 case structs.ServiceIdentityAccessorRegisterRequestType: 264 return n.applyUpsertSIAccessor(buf[1:], log.Index) 265 case structs.ServiceIdentityAccessorDeregisterRequestType: 266 return n.applyDeregisterSIAccessor(buf[1:], log.Index) 267 case structs.CSIVolumeRegisterRequestType: 268 return n.applyCSIVolumeRegister(buf[1:], log.Index) 269 case structs.CSIVolumeDeregisterRequestType: 270 return n.applyCSIVolumeDeregister(buf[1:], log.Index) 271 case structs.CSIVolumeClaimRequestType: 272 return n.applyCSIVolumeClaim(buf[1:], log.Index) 273 case structs.ScalingEventRegisterRequestType: 274 return n.applyUpsertScalingEvent(buf[1:], log.Index) 275 case structs.CSIVolumeClaimBatchRequestType: 276 return n.applyCSIVolumeBatchClaim(buf[1:], log.Index) 277 case structs.CSIPluginDeleteRequestType: 278 return n.applyCSIPluginDelete(buf[1:], log.Index) 279 } 280 281 // Check enterprise only message types. 282 if applier, ok := n.enterpriseAppliers[msgType]; ok { 283 return applier(buf[1:], log.Index) 284 } 285 286 // We didn't match anything, either panic or ignore 287 if ignoreUnknown { 288 n.logger.Warn("ignoring unknown message type, upgrade to newer version", "msg_type", msgType) 289 return nil 290 } 291 292 panic(fmt.Errorf("failed to apply request: %#v", buf)) 293 } 294 295 func (n *nomadFSM) applyClusterMetadata(buf []byte, index uint64) interface{} { 296 defer metrics.MeasureSince([]string{"nomad", "fsm", "cluster_meta"}, time.Now()) 297 298 var req structs.ClusterMetadata 299 if err := structs.Decode(buf, &req); err != nil { 300 panic(fmt.Errorf("failed to decode request: %v", err)) 301 } 302 303 if err := n.state.ClusterSetMetadata(index, &req); err != nil { 304 n.logger.Error("ClusterSetMetadata failed", "error", err) 305 return err 306 } 307 308 n.logger.Trace("ClusterSetMetadata", "cluster_id", req.ClusterID, "create_time", req.CreateTime) 309 310 return nil 311 } 312 313 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 314 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 315 var req structs.NodeRegisterRequest 316 if err := structs.Decode(buf, &req); err != nil { 317 panic(fmt.Errorf("failed to decode request: %v", err)) 318 } 319 320 // Handle upgrade paths 321 req.Node.Canonicalize() 322 323 if err := n.state.UpsertNode(index, req.Node); err != nil { 324 n.logger.Error("UpsertNode failed", "error", err) 325 return err 326 } 327 328 // Unblock evals for the nodes computed node class if it is in a ready 329 // state. 330 if req.Node.Status == structs.NodeStatusReady { 331 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 332 } 333 334 return nil 335 } 336 337 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 338 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 339 var req structs.NodeDeregisterRequest 340 if err := structs.Decode(buf, &req); err != nil { 341 panic(fmt.Errorf("failed to decode request: %v", err)) 342 } 343 344 if err := n.state.DeleteNode(index, []string{req.NodeID}); err != nil { 345 n.logger.Error("DeleteNode failed", "error", err) 346 return err 347 } 348 349 return nil 350 } 351 352 func (n *nomadFSM) applyDeregisterNodeBatch(buf []byte, index uint64) interface{} { 353 defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_deregister_node"}, time.Now()) 354 var req structs.NodeBatchDeregisterRequest 355 if err := structs.Decode(buf, &req); err != nil { 356 panic(fmt.Errorf("failed to decode request: %v", err)) 357 } 358 359 if err := n.state.DeleteNode(index, req.NodeIDs); err != nil { 360 n.logger.Error("DeleteNode failed", "error", err) 361 return err 362 } 363 364 return nil 365 } 366 367 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 368 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 369 var req structs.NodeUpdateStatusRequest 370 if err := structs.Decode(buf, &req); err != nil { 371 panic(fmt.Errorf("failed to decode request: %v", err)) 372 } 373 374 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status, req.UpdatedAt, req.NodeEvent); err != nil { 375 n.logger.Error("UpdateNodeStatus failed", "error", err) 376 return err 377 } 378 379 // Unblock evals for the nodes computed node class if it is in a ready 380 // state. 381 if req.Status == structs.NodeStatusReady { 382 ws := memdb.NewWatchSet() 383 node, err := n.state.NodeByID(ws, req.NodeID) 384 if err != nil { 385 n.logger.Error("looking up node failed", "node_id", req.NodeID, "error", err) 386 return err 387 388 } 389 n.blockedEvals.Unblock(node.ComputedClass, index) 390 n.blockedEvals.UnblockNode(req.NodeID, index) 391 } 392 393 return nil 394 } 395 396 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 397 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 398 var req structs.NodeUpdateDrainRequest 399 if err := structs.Decode(buf, &req); err != nil { 400 panic(fmt.Errorf("failed to decode request: %v", err)) 401 } 402 403 // COMPAT Remove in version 0.10 404 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 405 // drain strategy but we need to handle the upgrade path where the Raft log 406 // contains drain updates with just the drain boolean being manipulated. 407 if req.Drain && req.DrainStrategy == nil { 408 // Mark the drain strategy as a force to imitate the old style drain 409 // functionality. 410 req.DrainStrategy = &structs.DrainStrategy{ 411 DrainSpec: structs.DrainSpec{ 412 Deadline: -1 * time.Second, 413 }, 414 } 415 } 416 417 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.UpdatedAt, req.NodeEvent); err != nil { 418 n.logger.Error("UpdateNodeDrain failed", "error", err) 419 return err 420 } 421 return nil 422 } 423 424 func (n *nomadFSM) applyBatchDrainUpdate(buf []byte, index uint64) interface{} { 425 defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_node_drain_update"}, time.Now()) 426 var req structs.BatchNodeUpdateDrainRequest 427 if err := structs.Decode(buf, &req); err != nil { 428 panic(fmt.Errorf("failed to decode request: %v", err)) 429 } 430 431 if err := n.state.BatchUpdateNodeDrain(index, req.UpdatedAt, req.Updates, req.NodeEvents); err != nil { 432 n.logger.Error("BatchUpdateNodeDrain failed", "error", err) 433 return err 434 } 435 return nil 436 } 437 438 func (n *nomadFSM) applyNodeEligibilityUpdate(buf []byte, index uint64) interface{} { 439 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_eligibility_update"}, time.Now()) 440 var req structs.NodeUpdateEligibilityRequest 441 if err := structs.Decode(buf, &req); err != nil { 442 panic(fmt.Errorf("failed to decode request: %v", err)) 443 } 444 445 // Lookup the existing node 446 node, err := n.state.NodeByID(nil, req.NodeID) 447 if err != nil { 448 n.logger.Error("UpdateNodeEligibility failed to lookup node", "node_id", req.NodeID, "error", err) 449 return err 450 } 451 452 if err := n.state.UpdateNodeEligibility(index, req.NodeID, req.Eligibility, req.UpdatedAt, req.NodeEvent); err != nil { 453 n.logger.Error("UpdateNodeEligibility failed", "error", err) 454 return err 455 } 456 457 // Unblock evals for the nodes computed node class if it is in a ready 458 // state. 459 if node != nil && node.SchedulingEligibility == structs.NodeSchedulingIneligible && 460 req.Eligibility == structs.NodeSchedulingEligible { 461 n.blockedEvals.Unblock(node.ComputedClass, index) 462 n.blockedEvals.UnblockNode(req.NodeID, index) 463 } 464 465 return nil 466 } 467 468 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 469 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 470 var req structs.JobRegisterRequest 471 if err := structs.Decode(buf, &req); err != nil { 472 panic(fmt.Errorf("failed to decode request: %v", err)) 473 } 474 475 /* Handle upgrade paths: 476 * - Empty maps and slices should be treated as nil to avoid 477 * un-intended destructive updates in scheduler since we use 478 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanitizes 479 * the incoming job. 480 * - Migrate from old style upgrade stanza that used only a stagger. 481 */ 482 req.Job.Canonicalize() 483 484 if err := n.state.UpsertJob(index, req.Job); err != nil { 485 n.logger.Error("UpsertJob failed", "error", err) 486 return err 487 } 488 489 // We always add the job to the periodic dispatcher because there is the 490 // possibility that the periodic spec was removed and then we should stop 491 // tracking it. 492 if err := n.periodicDispatcher.Add(req.Job); err != nil { 493 n.logger.Error("periodicDispatcher.Add failed", "error", err) 494 return fmt.Errorf("failed adding job to periodic dispatcher: %v", err) 495 } 496 497 // Create a watch set 498 ws := memdb.NewWatchSet() 499 500 // If it is an active periodic job, record the time it was inserted. This is 501 // necessary for recovering during leader election. It is possible that from 502 // the time it is added to when it was suppose to launch, leader election 503 // occurs and the job was not launched. In this case, we use the insertion 504 // time to determine if a launch was missed. 505 if req.Job.IsPeriodicActive() { 506 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Namespace, req.Job.ID) 507 if err != nil { 508 n.logger.Error("PeriodicLaunchByID failed", "error", err) 509 return err 510 } 511 512 // Record the insertion time as a launch. We overload the launch table 513 // such that the first entry is the insertion time. 514 if prevLaunch == nil { 515 launch := &structs.PeriodicLaunch{ 516 ID: req.Job.ID, 517 Namespace: req.Namespace, 518 Launch: time.Now(), 519 } 520 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 521 n.logger.Error("UpsertPeriodicLaunch failed", "error", err) 522 return err 523 } 524 } 525 } 526 527 // Check if the parent job is periodic and mark the launch time. 528 parentID := req.Job.ParentID 529 if parentID != "" { 530 parent, err := n.state.JobByID(ws, req.Namespace, parentID) 531 if err != nil { 532 n.logger.Error("JobByID lookup for parent failed", "parent_id", parentID, "namespace", req.Namespace, "error", err) 533 return err 534 } else if parent == nil { 535 // The parent has been deregistered. 536 return nil 537 } 538 539 if parent.IsPeriodic() && !parent.IsParameterized() { 540 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 541 if err != nil { 542 n.logger.Error("LaunchTime failed", "job", req.Job.NamespacedID(), "error", err) 543 return err 544 } 545 546 launch := &structs.PeriodicLaunch{ 547 ID: parentID, 548 Namespace: req.Namespace, 549 Launch: t, 550 } 551 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 552 n.logger.Error("UpsertPeriodicLaunch failed", "error", err) 553 return err 554 } 555 } 556 } 557 558 return nil 559 } 560 561 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 562 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 563 var req structs.JobDeregisterRequest 564 if err := structs.Decode(buf, &req); err != nil { 565 panic(fmt.Errorf("failed to decode request: %v", err)) 566 } 567 568 return n.state.WithWriteTransaction(func(tx state.Txn) error { 569 if err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge, tx); err != nil { 570 n.logger.Error("deregistering job failed", "error", err) 571 return err 572 } 573 574 return nil 575 }) 576 } 577 578 func (n *nomadFSM) applyBatchDeregisterJob(buf []byte, index uint64) interface{} { 579 defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_deregister_job"}, time.Now()) 580 var req structs.JobBatchDeregisterRequest 581 if err := structs.Decode(buf, &req); err != nil { 582 panic(fmt.Errorf("failed to decode request: %v", err)) 583 } 584 585 // Perform all store updates atomically to ensure a consistent view for store readers. 586 // A partial update may increment the snapshot index, allowing eval brokers to process 587 // evals for jobs whose deregistering didn't get committed yet. 588 err := n.state.WithWriteTransaction(func(tx state.Txn) error { 589 for jobNS, options := range req.Jobs { 590 if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge, tx); err != nil { 591 n.logger.Error("deregistering job failed", "job", jobNS, "error", err) 592 return err 593 } 594 } 595 596 if err := n.state.UpsertEvalsTxn(index, req.Evals, tx); err != nil { 597 n.logger.Error("UpsertEvals failed", "error", err) 598 return err 599 } 600 601 return nil 602 }) 603 604 if err != nil { 605 return err 606 } 607 608 // perform the side effects outside the transactions 609 n.handleUpsertedEvals(req.Evals) 610 return nil 611 } 612 613 // handleJobDeregister is used to deregister a job. 614 func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool, tx state.Txn) error { 615 // If it is periodic remove it from the dispatcher 616 if err := n.periodicDispatcher.Remove(namespace, jobID); err != nil { 617 n.logger.Error("periodicDispatcher.Remove failed", "error", err) 618 return err 619 } 620 621 if purge { 622 if err := n.state.DeleteJobTxn(index, namespace, jobID, tx); err != nil { 623 n.logger.Error("DeleteJob failed", "error", err) 624 return err 625 } 626 627 // We always delete from the periodic launch table because it is possible that 628 // the job was updated to be non-periodic, thus checking if it is periodic 629 // doesn't ensure we clean it up properly. 630 n.state.DeletePeriodicLaunchTxn(index, namespace, jobID, tx) 631 } else { 632 // Get the current job and mark it as stopped and re-insert it. 633 ws := memdb.NewWatchSet() 634 current, err := n.state.JobByIDTxn(ws, namespace, jobID, tx) 635 if err != nil { 636 n.logger.Error("JobByID lookup failed", "error", err) 637 return err 638 } 639 640 if current == nil { 641 return fmt.Errorf("job %q in namespace %q doesn't exist to be deregistered", jobID, namespace) 642 } 643 644 stopped := current.Copy() 645 stopped.Stop = true 646 647 if err := n.state.UpsertJobTxn(index, stopped, tx); err != nil { 648 n.logger.Error("UpsertJob failed", "error", err) 649 return err 650 } 651 } 652 653 return nil 654 } 655 656 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 657 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 658 var req structs.EvalUpdateRequest 659 if err := structs.Decode(buf, &req); err != nil { 660 panic(fmt.Errorf("failed to decode request: %v", err)) 661 } 662 return n.upsertEvals(index, req.Evals) 663 } 664 665 func (n *nomadFSM) upsertEvals(index uint64, evals []*structs.Evaluation) error { 666 if err := n.state.UpsertEvals(index, evals); err != nil { 667 n.logger.Error("UpsertEvals failed", "error", err) 668 return err 669 } 670 671 n.handleUpsertedEvals(evals) 672 return nil 673 } 674 675 // handleUpsertingEval is a helper for taking action after upserting 676 // evaluations. 677 func (n *nomadFSM) handleUpsertedEvals(evals []*structs.Evaluation) { 678 for _, eval := range evals { 679 n.handleUpsertedEval(eval) 680 } 681 } 682 683 // handleUpsertingEval is a helper for taking action after upserting an eval. 684 func (n *nomadFSM) handleUpsertedEval(eval *structs.Evaluation) { 685 if eval == nil { 686 return 687 } 688 689 if eval.ShouldEnqueue() { 690 n.evalBroker.Enqueue(eval) 691 } else if eval.ShouldBlock() { 692 n.blockedEvals.Block(eval) 693 } else if eval.Status == structs.EvalStatusComplete && 694 len(eval.FailedTGAllocs) == 0 { 695 // If we have a successful evaluation for a node, untrack any 696 // blocked evaluation 697 n.blockedEvals.Untrack(eval.JobID, eval.Namespace) 698 } 699 } 700 701 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 702 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 703 var req structs.EvalDeleteRequest 704 if err := structs.Decode(buf, &req); err != nil { 705 panic(fmt.Errorf("failed to decode request: %v", err)) 706 } 707 708 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 709 n.logger.Error("DeleteEval failed", "error", err) 710 return err 711 } 712 return nil 713 } 714 715 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 716 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 717 var req structs.AllocUpdateRequest 718 if err := structs.Decode(buf, &req); err != nil { 719 panic(fmt.Errorf("failed to decode request: %v", err)) 720 } 721 722 // Attach the job to all the allocations. It is pulled out in the 723 // payload to avoid the redundancy of encoding, but should be denormalized 724 // prior to being inserted into MemDB. 725 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 726 727 for _, alloc := range req.Alloc { 728 // COMPAT(0.11): Remove in 0.11 729 // Calculate the total resources of allocations. It is pulled out in the 730 // payload to avoid encoding something that can be computed, but should be 731 // denormalized prior to being inserted into MemDB. 732 if alloc.Resources == nil { 733 alloc.Resources = new(structs.Resources) 734 for _, task := range alloc.TaskResources { 735 alloc.Resources.Add(task) 736 } 737 738 // Add the shared resources 739 alloc.Resources.Add(alloc.SharedResources) 740 } 741 742 // Handle upgrade path 743 alloc.Canonicalize() 744 } 745 746 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 747 n.logger.Error("UpsertAllocs failed", "error", err) 748 return err 749 } 750 return nil 751 } 752 753 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 754 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 755 var req structs.AllocUpdateRequest 756 if err := structs.Decode(buf, &req); err != nil { 757 panic(fmt.Errorf("failed to decode request: %v", err)) 758 } 759 if len(req.Alloc) == 0 { 760 return nil 761 } 762 763 // Create a watch set 764 ws := memdb.NewWatchSet() 765 766 // Updating the allocs with the job id and task group name 767 for _, alloc := range req.Alloc { 768 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 769 alloc.JobID = existing.JobID 770 alloc.TaskGroup = existing.TaskGroup 771 } 772 } 773 774 // Update all the client allocations 775 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 776 n.logger.Error("UpdateAllocFromClient failed", "error", err) 777 return err 778 } 779 780 // Update any evals 781 if len(req.Evals) > 0 { 782 if err := n.upsertEvals(index, req.Evals); err != nil { 783 n.logger.Error("applyAllocClientUpdate failed to update evaluations", "error", err) 784 return err 785 } 786 } 787 788 // Unblock evals for the nodes computed node class if the client has 789 // finished running an allocation. 790 for _, alloc := range req.Alloc { 791 if alloc.ClientStatus == structs.AllocClientStatusComplete || 792 alloc.ClientStatus == structs.AllocClientStatusFailed { 793 nodeID := alloc.NodeID 794 node, err := n.state.NodeByID(ws, nodeID) 795 if err != nil || node == nil { 796 n.logger.Error("looking up node failed", "node_id", nodeID, "error", err) 797 return err 798 799 } 800 801 // Unblock any associated quota 802 quota, err := n.allocQuota(alloc.ID) 803 if err != nil { 804 n.logger.Error("looking up quota associated with alloc failed", "alloc_id", alloc.ID, "error", err) 805 return err 806 } 807 808 n.blockedEvals.UnblockClassAndQuota(node.ComputedClass, quota, index) 809 n.blockedEvals.UnblockNode(node.ID, index) 810 } 811 } 812 813 return nil 814 } 815 816 // applyAllocUpdateDesiredTransition is used to update the desired transitions 817 // of a set of allocations. 818 func (n *nomadFSM) applyAllocUpdateDesiredTransition(buf []byte, index uint64) interface{} { 819 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update_desired_transition"}, time.Now()) 820 var req structs.AllocUpdateDesiredTransitionRequest 821 if err := structs.Decode(buf, &req); err != nil { 822 panic(fmt.Errorf("failed to decode request: %v", err)) 823 } 824 825 if err := n.state.UpdateAllocsDesiredTransitions(index, req.Allocs, req.Evals); err != nil { 826 n.logger.Error("UpdateAllocsDesiredTransitions failed", "error", err) 827 return err 828 } 829 830 n.handleUpsertedEvals(req.Evals) 831 return nil 832 } 833 834 // applyReconcileSummaries reconciles summaries for all the jobs 835 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 836 if err := n.state.ReconcileJobSummaries(index); err != nil { 837 return err 838 } 839 return n.reconcileQueuedAllocations(index) 840 } 841 842 // applyUpsertNodeEvent tracks the given node events. 843 func (n *nomadFSM) applyUpsertNodeEvent(buf []byte, index uint64) interface{} { 844 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_node_events"}, time.Now()) 845 var req structs.EmitNodeEventsRequest 846 if err := structs.Decode(buf, &req); err != nil { 847 panic(fmt.Errorf("failed to decode EmitNodeEventsRequest: %v", err)) 848 } 849 850 if err := n.state.UpsertNodeEvents(index, req.NodeEvents); err != nil { 851 n.logger.Error("failed to add node events", "error", err) 852 return err 853 } 854 855 return nil 856 } 857 858 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 859 // and task 860 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 861 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 862 var req structs.VaultAccessorsRequest 863 if err := structs.Decode(buf, &req); err != nil { 864 panic(fmt.Errorf("failed to decode request: %v", err)) 865 } 866 867 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 868 n.logger.Error("UpsertVaultAccessor failed", "error", err) 869 return err 870 } 871 872 return nil 873 } 874 875 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 876 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 877 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 878 var req structs.VaultAccessorsRequest 879 if err := structs.Decode(buf, &req); err != nil { 880 panic(fmt.Errorf("failed to decode request: %v", err)) 881 } 882 883 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 884 n.logger.Error("DeregisterVaultAccessor failed", "error", err) 885 return err 886 } 887 888 return nil 889 } 890 891 func (n *nomadFSM) applyUpsertSIAccessor(buf []byte, index uint64) interface{} { 892 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_si_accessor"}, time.Now()) 893 var request structs.SITokenAccessorsRequest 894 if err := structs.Decode(buf, &request); err != nil { 895 panic(errors.Wrap(err, "failed to decode request")) 896 } 897 898 if err := n.state.UpsertSITokenAccessors(index, request.Accessors); err != nil { 899 n.logger.Error("UpsertSITokenAccessors failed", "error", err) 900 return err 901 } 902 903 return nil 904 } 905 906 func (n *nomadFSM) applyDeregisterSIAccessor(buf []byte, index uint64) interface{} { 907 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_si_accessor"}, time.Now()) 908 var request structs.SITokenAccessorsRequest 909 if err := structs.Decode(buf, &request); err != nil { 910 panic(errors.Wrap(err, "failed to decode request")) 911 } 912 913 if err := n.state.DeleteSITokenAccessors(index, request.Accessors); err != nil { 914 n.logger.Error("DeregisterSITokenAccessor failed", "error", err) 915 return err 916 } 917 918 return nil 919 } 920 921 // applyPlanApply applies the results of a plan application 922 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 923 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 924 var req structs.ApplyPlanResultsRequest 925 if err := structs.Decode(buf, &req); err != nil { 926 panic(fmt.Errorf("failed to decode request: %v", err)) 927 } 928 929 if err := n.state.UpsertPlanResults(index, &req); err != nil { 930 n.logger.Error("ApplyPlan failed", "error", err) 931 return err 932 } 933 934 // Add evals for jobs that were preempted 935 n.handleUpsertedEvals(req.PreemptionEvals) 936 return nil 937 } 938 939 // applyDeploymentStatusUpdate is used to update the status of an existing 940 // deployment 941 func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} { 942 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now()) 943 var req structs.DeploymentStatusUpdateRequest 944 if err := structs.Decode(buf, &req); err != nil { 945 panic(fmt.Errorf("failed to decode request: %v", err)) 946 } 947 948 if err := n.state.UpdateDeploymentStatus(index, &req); err != nil { 949 n.logger.Error("UpsertDeploymentStatusUpdate failed", "error", err) 950 return err 951 } 952 953 n.handleUpsertedEval(req.Eval) 954 return nil 955 } 956 957 // applyDeploymentPromotion is used to promote canaries in a deployment 958 func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} { 959 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now()) 960 var req structs.ApplyDeploymentPromoteRequest 961 if err := structs.Decode(buf, &req); err != nil { 962 panic(fmt.Errorf("failed to decode request: %v", err)) 963 } 964 965 if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil { 966 n.logger.Error("UpsertDeploymentPromotion failed", "error", err) 967 return err 968 } 969 970 n.handleUpsertedEval(req.Eval) 971 return nil 972 } 973 974 // applyDeploymentAllocHealth is used to set the health of allocations as part 975 // of a deployment 976 func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} { 977 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now()) 978 var req structs.ApplyDeploymentAllocHealthRequest 979 if err := structs.Decode(buf, &req); err != nil { 980 panic(fmt.Errorf("failed to decode request: %v", err)) 981 } 982 983 if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil { 984 n.logger.Error("UpsertDeploymentAllocHealth failed", "error", err) 985 return err 986 } 987 988 n.handleUpsertedEval(req.Eval) 989 return nil 990 } 991 992 // applyDeploymentDelete is used to delete a set of deployments 993 func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} { 994 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now()) 995 var req structs.DeploymentDeleteRequest 996 if err := structs.Decode(buf, &req); err != nil { 997 panic(fmt.Errorf("failed to decode request: %v", err)) 998 } 999 1000 if err := n.state.DeleteDeployment(index, req.Deployments); err != nil { 1001 n.logger.Error("DeleteDeployment failed", "error", err) 1002 return err 1003 } 1004 1005 return nil 1006 } 1007 1008 // applyJobStability is used to set the stability of a job 1009 func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} { 1010 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now()) 1011 var req structs.JobStabilityRequest 1012 if err := structs.Decode(buf, &req); err != nil { 1013 panic(fmt.Errorf("failed to decode request: %v", err)) 1014 } 1015 1016 if err := n.state.UpdateJobStability(index, req.Namespace, req.JobID, req.JobVersion, req.Stable); err != nil { 1017 n.logger.Error("UpdateJobStability failed", "error", err) 1018 return err 1019 } 1020 1021 return nil 1022 } 1023 1024 // applyACLPolicyUpsert is used to upsert a set of policies 1025 func (n *nomadFSM) applyACLPolicyUpsert(buf []byte, index uint64) interface{} { 1026 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_upsert"}, time.Now()) 1027 var req structs.ACLPolicyUpsertRequest 1028 if err := structs.Decode(buf, &req); err != nil { 1029 panic(fmt.Errorf("failed to decode request: %v", err)) 1030 } 1031 1032 if err := n.state.UpsertACLPolicies(index, req.Policies); err != nil { 1033 n.logger.Error("UpsertACLPolicies failed", "error", err) 1034 return err 1035 } 1036 return nil 1037 } 1038 1039 // applyACLPolicyDelete is used to delete a set of policies 1040 func (n *nomadFSM) applyACLPolicyDelete(buf []byte, index uint64) interface{} { 1041 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_delete"}, time.Now()) 1042 var req structs.ACLPolicyDeleteRequest 1043 if err := structs.Decode(buf, &req); err != nil { 1044 panic(fmt.Errorf("failed to decode request: %v", err)) 1045 } 1046 1047 if err := n.state.DeleteACLPolicies(index, req.Names); err != nil { 1048 n.logger.Error("DeleteACLPolicies failed", "error", err) 1049 return err 1050 } 1051 return nil 1052 } 1053 1054 // applyACLTokenUpsert is used to upsert a set of policies 1055 func (n *nomadFSM) applyACLTokenUpsert(buf []byte, index uint64) interface{} { 1056 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_upsert"}, time.Now()) 1057 var req structs.ACLTokenUpsertRequest 1058 if err := structs.Decode(buf, &req); err != nil { 1059 panic(fmt.Errorf("failed to decode request: %v", err)) 1060 } 1061 1062 if err := n.state.UpsertACLTokens(index, req.Tokens); err != nil { 1063 n.logger.Error("UpsertACLTokens failed", "error", err) 1064 return err 1065 } 1066 return nil 1067 } 1068 1069 // applyACLTokenDelete is used to delete a set of policies 1070 func (n *nomadFSM) applyACLTokenDelete(buf []byte, index uint64) interface{} { 1071 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_delete"}, time.Now()) 1072 var req structs.ACLTokenDeleteRequest 1073 if err := structs.Decode(buf, &req); err != nil { 1074 panic(fmt.Errorf("failed to decode request: %v", err)) 1075 } 1076 1077 if err := n.state.DeleteACLTokens(index, req.AccessorIDs); err != nil { 1078 n.logger.Error("DeleteACLTokens failed", "error", err) 1079 return err 1080 } 1081 return nil 1082 } 1083 1084 // applyACLTokenBootstrap is used to bootstrap an ACL token 1085 func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{} { 1086 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_bootstrap"}, time.Now()) 1087 var req structs.ACLTokenBootstrapRequest 1088 if err := structs.Decode(buf, &req); err != nil { 1089 panic(fmt.Errorf("failed to decode request: %v", err)) 1090 } 1091 1092 if err := n.state.BootstrapACLTokens(index, req.ResetIndex, req.Token); err != nil { 1093 n.logger.Error("BootstrapACLToken failed", "error", err) 1094 return err 1095 } 1096 return nil 1097 } 1098 1099 func (n *nomadFSM) applyAutopilotUpdate(buf []byte, index uint64) interface{} { 1100 var req structs.AutopilotSetConfigRequest 1101 if err := structs.Decode(buf, &req); err != nil { 1102 panic(fmt.Errorf("failed to decode request: %v", err)) 1103 } 1104 defer metrics.MeasureSince([]string{"nomad", "fsm", "autopilot"}, time.Now()) 1105 1106 if req.CAS { 1107 act, err := n.state.AutopilotCASConfig(index, req.Config.ModifyIndex, &req.Config) 1108 if err != nil { 1109 return err 1110 } 1111 return act 1112 } 1113 return n.state.AutopilotSetConfig(index, &req.Config) 1114 } 1115 1116 func (n *nomadFSM) applySchedulerConfigUpdate(buf []byte, index uint64) interface{} { 1117 var req structs.SchedulerSetConfigRequest 1118 if err := structs.Decode(buf, &req); err != nil { 1119 panic(fmt.Errorf("failed to decode request: %v", err)) 1120 } 1121 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_scheduler_config"}, time.Now()) 1122 1123 req.Config.Canonicalize() 1124 1125 if req.CAS { 1126 applied, err := n.state.SchedulerCASConfig(index, req.Config.ModifyIndex, &req.Config) 1127 if err != nil { 1128 return err 1129 } 1130 return applied 1131 } 1132 return n.state.SchedulerSetConfig(index, &req.Config) 1133 } 1134 1135 func (n *nomadFSM) applyCSIVolumeRegister(buf []byte, index uint64) interface{} { 1136 var req structs.CSIVolumeRegisterRequest 1137 if err := structs.Decode(buf, &req); err != nil { 1138 panic(fmt.Errorf("failed to decode request: %v", err)) 1139 } 1140 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_register"}, time.Now()) 1141 1142 if err := n.state.CSIVolumeRegister(index, req.Volumes); err != nil { 1143 n.logger.Error("CSIVolumeRegister failed", "error", err) 1144 return err 1145 } 1146 1147 return nil 1148 } 1149 1150 func (n *nomadFSM) applyCSIVolumeDeregister(buf []byte, index uint64) interface{} { 1151 var req structs.CSIVolumeDeregisterRequest 1152 if err := structs.Decode(buf, &req); err != nil { 1153 panic(fmt.Errorf("failed to decode request: %v", err)) 1154 } 1155 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_deregister"}, time.Now()) 1156 1157 if err := n.state.CSIVolumeDeregister(index, req.RequestNamespace(), req.VolumeIDs); err != nil { 1158 n.logger.Error("CSIVolumeDeregister failed", "error", err) 1159 return err 1160 } 1161 1162 return nil 1163 } 1164 1165 func (n *nomadFSM) applyCSIVolumeBatchClaim(buf []byte, index uint64) interface{} { 1166 var batch *structs.CSIVolumeClaimBatchRequest 1167 if err := structs.Decode(buf, &batch); err != nil { 1168 panic(fmt.Errorf("failed to decode request: %v", err)) 1169 } 1170 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_batch_claim"}, time.Now()) 1171 1172 for _, req := range batch.Claims { 1173 err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), 1174 req.VolumeID, req.ToClaim()) 1175 if err != nil { 1176 n.logger.Error("CSIVolumeClaim for batch failed", "error", err) 1177 return err // note: fails the remaining batch 1178 } 1179 } 1180 return nil 1181 } 1182 1183 func (n *nomadFSM) applyCSIVolumeClaim(buf []byte, index uint64) interface{} { 1184 var req structs.CSIVolumeClaimRequest 1185 if err := structs.Decode(buf, &req); err != nil { 1186 panic(fmt.Errorf("failed to decode request: %v", err)) 1187 } 1188 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_claim"}, time.Now()) 1189 1190 if err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), req.VolumeID, req.ToClaim()); err != nil { 1191 n.logger.Error("CSIVolumeClaim failed", "error", err) 1192 return err 1193 } 1194 return nil 1195 } 1196 1197 func (n *nomadFSM) applyCSIPluginDelete(buf []byte, index uint64) interface{} { 1198 var req structs.CSIPluginDeleteRequest 1199 if err := structs.Decode(buf, &req); err != nil { 1200 panic(fmt.Errorf("failed to decode request: %v", err)) 1201 } 1202 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_plugin_delete"}, time.Now()) 1203 1204 if err := n.state.DeleteCSIPlugin(index, req.ID); err != nil { 1205 // "plugin in use" is an error for the state store but not for typical 1206 // callers, so reduce log noise by not logging that case here 1207 if err.Error() != "plugin in use" { 1208 n.logger.Error("DeleteCSIPlugin failed", "error", err) 1209 } 1210 return err 1211 } 1212 return nil 1213 } 1214 1215 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 1216 // Create a new snapshot 1217 snap, err := n.state.Snapshot() 1218 if err != nil { 1219 return nil, err 1220 } 1221 1222 ns := &nomadSnapshot{ 1223 snap: snap, 1224 timetable: n.timetable, 1225 } 1226 return ns, nil 1227 } 1228 1229 func (n *nomadFSM) Restore(old io.ReadCloser) error { 1230 defer old.Close() 1231 1232 // Create a new state store 1233 config := &state.StateStoreConfig{ 1234 Logger: n.config.Logger, 1235 Region: n.config.Region, 1236 } 1237 newState, err := state.NewStateStore(config) 1238 if err != nil { 1239 return err 1240 } 1241 1242 // Start the state restore 1243 restore, err := newState.Restore() 1244 if err != nil { 1245 return err 1246 } 1247 defer restore.Abort() 1248 1249 // Create a decoder 1250 dec := codec.NewDecoder(old, structs.MsgpackHandle) 1251 1252 // Read in the header 1253 var header snapshotHeader 1254 if err := dec.Decode(&header); err != nil { 1255 return err 1256 } 1257 1258 // Populate the new state 1259 msgType := make([]byte, 1) 1260 for { 1261 // Read the message type 1262 _, err := old.Read(msgType) 1263 if err == io.EOF { 1264 break 1265 } else if err != nil { 1266 return err 1267 } 1268 1269 // Decode 1270 snapType := SnapshotType(msgType[0]) 1271 switch snapType { 1272 case TimeTableSnapshot: 1273 if err := n.timetable.Deserialize(dec); err != nil { 1274 return fmt.Errorf("time table deserialize failed: %v", err) 1275 } 1276 1277 case NodeSnapshot: 1278 node := new(structs.Node) 1279 if err := dec.Decode(node); err != nil { 1280 return err 1281 } 1282 1283 // Handle upgrade paths 1284 node.Canonicalize() 1285 1286 if err := restore.NodeRestore(node); err != nil { 1287 return err 1288 } 1289 1290 case JobSnapshot: 1291 job := new(structs.Job) 1292 if err := dec.Decode(job); err != nil { 1293 return err 1294 } 1295 1296 /* Handle upgrade paths: 1297 * - Empty maps and slices should be treated as nil to avoid 1298 * un-intended destructive updates in scheduler since we use 1299 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanitizes 1300 * the incoming job. 1301 * - Migrate from old style upgrade stanza that used only a stagger. 1302 */ 1303 job.Canonicalize() 1304 1305 if err := restore.JobRestore(job); err != nil { 1306 return err 1307 } 1308 1309 case EvalSnapshot: 1310 eval := new(structs.Evaluation) 1311 if err := dec.Decode(eval); err != nil { 1312 return err 1313 } 1314 1315 if err := restore.EvalRestore(eval); err != nil { 1316 return err 1317 } 1318 1319 case AllocSnapshot: 1320 alloc := new(structs.Allocation) 1321 if err := dec.Decode(alloc); err != nil { 1322 return err 1323 } 1324 1325 // Handle upgrade path 1326 alloc.Canonicalize() 1327 1328 if err := restore.AllocRestore(alloc); err != nil { 1329 return err 1330 } 1331 1332 case IndexSnapshot: 1333 idx := new(state.IndexEntry) 1334 if err := dec.Decode(idx); err != nil { 1335 return err 1336 } 1337 if err := restore.IndexRestore(idx); err != nil { 1338 return err 1339 } 1340 1341 case PeriodicLaunchSnapshot: 1342 launch := new(structs.PeriodicLaunch) 1343 if err := dec.Decode(launch); err != nil { 1344 return err 1345 } 1346 1347 if err := restore.PeriodicLaunchRestore(launch); err != nil { 1348 return err 1349 } 1350 1351 case JobSummarySnapshot: 1352 summary := new(structs.JobSummary) 1353 if err := dec.Decode(summary); err != nil { 1354 return err 1355 } 1356 1357 if err := restore.JobSummaryRestore(summary); err != nil { 1358 return err 1359 } 1360 1361 case VaultAccessorSnapshot: 1362 accessor := new(structs.VaultAccessor) 1363 if err := dec.Decode(accessor); err != nil { 1364 return err 1365 } 1366 if err := restore.VaultAccessorRestore(accessor); err != nil { 1367 return err 1368 } 1369 1370 case ServiceIdentityTokenAccessorSnapshot: 1371 accessor := new(structs.SITokenAccessor) 1372 if err := dec.Decode(accessor); err != nil { 1373 return err 1374 } 1375 if err := restore.SITokenAccessorRestore(accessor); err != nil { 1376 return err 1377 } 1378 1379 case JobVersionSnapshot: 1380 version := new(structs.Job) 1381 if err := dec.Decode(version); err != nil { 1382 return err 1383 } 1384 1385 if err := restore.JobVersionRestore(version); err != nil { 1386 return err 1387 } 1388 1389 case DeploymentSnapshot: 1390 deployment := new(structs.Deployment) 1391 if err := dec.Decode(deployment); err != nil { 1392 return err 1393 } 1394 1395 if err := restore.DeploymentRestore(deployment); err != nil { 1396 return err 1397 } 1398 1399 case ACLPolicySnapshot: 1400 policy := new(structs.ACLPolicy) 1401 if err := dec.Decode(policy); err != nil { 1402 return err 1403 } 1404 if err := restore.ACLPolicyRestore(policy); err != nil { 1405 return err 1406 } 1407 1408 case ACLTokenSnapshot: 1409 token := new(structs.ACLToken) 1410 if err := dec.Decode(token); err != nil { 1411 return err 1412 } 1413 if err := restore.ACLTokenRestore(token); err != nil { 1414 return err 1415 } 1416 1417 case SchedulerConfigSnapshot: 1418 schedConfig := new(structs.SchedulerConfiguration) 1419 if err := dec.Decode(schedConfig); err != nil { 1420 return err 1421 } 1422 schedConfig.Canonicalize() 1423 if err := restore.SchedulerConfigRestore(schedConfig); err != nil { 1424 return err 1425 } 1426 1427 case ClusterMetadataSnapshot: 1428 meta := new(structs.ClusterMetadata) 1429 if err := dec.Decode(meta); err != nil { 1430 return err 1431 } 1432 if err := restore.ClusterMetadataRestore(meta); err != nil { 1433 return err 1434 } 1435 1436 case ScalingEventsSnapshot: 1437 jobScalingEvents := new(structs.JobScalingEvents) 1438 if err := dec.Decode(jobScalingEvents); err != nil { 1439 return err 1440 } 1441 1442 if err := restore.ScalingEventsRestore(jobScalingEvents); err != nil { 1443 return err 1444 } 1445 1446 case ScalingPolicySnapshot: 1447 scalingPolicy := new(structs.ScalingPolicy) 1448 if err := dec.Decode(scalingPolicy); err != nil { 1449 return err 1450 } 1451 1452 if err := restore.ScalingPolicyRestore(scalingPolicy); err != nil { 1453 return err 1454 } 1455 1456 case CSIPluginSnapshot: 1457 plugin := new(structs.CSIPlugin) 1458 if err := dec.Decode(plugin); err != nil { 1459 return err 1460 } 1461 1462 if err := restore.CSIPluginRestore(plugin); err != nil { 1463 return err 1464 } 1465 1466 case CSIVolumeSnapshot: 1467 plugin := new(structs.CSIVolume) 1468 if err := dec.Decode(plugin); err != nil { 1469 return err 1470 } 1471 1472 if err := restore.CSIVolumeRestore(plugin); err != nil { 1473 return err 1474 } 1475 1476 default: 1477 // Check if this is an enterprise only object being restored 1478 restorer, ok := n.enterpriseRestorers[snapType] 1479 if !ok { 1480 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 1481 } 1482 1483 // Restore the enterprise only object 1484 if err := restorer(restore, dec); err != nil { 1485 return err 1486 } 1487 } 1488 } 1489 1490 restore.Commit() 1491 1492 // COMPAT Remove in 0.10 1493 // Clean up active deployments that do not have a job 1494 if err := n.failLeakedDeployments(newState); err != nil { 1495 return err 1496 } 1497 1498 // External code might be calling State(), so we need to synchronize 1499 // here to make sure we swap in the new state store atomically. 1500 n.stateLock.Lock() 1501 stateOld := n.state 1502 n.state = newState 1503 n.stateLock.Unlock() 1504 1505 // Signal that the old state store has been abandoned. This is required 1506 // because we don't operate on it any more, we just throw it away, so 1507 // blocking queries won't see any changes and need to be woken up. 1508 stateOld.Abandon() 1509 1510 return nil 1511 } 1512 1513 // failLeakedDeployments is used to fail deployments that do not have a job. 1514 // This state is a broken invariant that should not occur since 0.8.X. 1515 func (n *nomadFSM) failLeakedDeployments(state *state.StateStore) error { 1516 // Scan for deployments that are referencing a job that no longer exists. 1517 // This could happen if multiple deployments were created for a given job 1518 // and thus the older deployment leaks and then the job is removed. 1519 iter, err := state.Deployments(nil) 1520 if err != nil { 1521 return fmt.Errorf("failed to query deployments: %v", err) 1522 } 1523 1524 dindex, err := state.Index("deployment") 1525 if err != nil { 1526 return fmt.Errorf("couldn't fetch index of deployments table: %v", err) 1527 } 1528 1529 for { 1530 raw := iter.Next() 1531 if raw == nil { 1532 break 1533 } 1534 1535 d := raw.(*structs.Deployment) 1536 1537 // We are only looking for active deployments where the job no longer 1538 // exists 1539 if !d.Active() { 1540 continue 1541 } 1542 1543 // Find the job 1544 job, err := state.JobByID(nil, d.Namespace, d.JobID) 1545 if err != nil { 1546 return fmt.Errorf("failed to lookup job %s from deployment %q: %v", d.JobID, d.ID, err) 1547 } 1548 1549 // Job exists. 1550 if job != nil { 1551 continue 1552 } 1553 1554 // Update the deployment to be terminal 1555 failed := d.Copy() 1556 failed.Status = structs.DeploymentStatusCancelled 1557 failed.StatusDescription = structs.DeploymentStatusDescriptionStoppedJob 1558 if err := state.UpsertDeployment(dindex, failed); err != nil { 1559 return fmt.Errorf("failed to mark leaked deployment %q as failed: %v", failed.ID, err) 1560 } 1561 } 1562 1563 return nil 1564 } 1565 1566 // reconcileQueuedAllocations re-calculates the queued allocations for every job that we 1567 // created a Job Summary during the snap shot restore 1568 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 1569 // Get all the jobs 1570 ws := memdb.NewWatchSet() 1571 iter, err := n.state.Jobs(ws) 1572 if err != nil { 1573 return err 1574 } 1575 1576 snap, err := n.state.Snapshot() 1577 if err != nil { 1578 return fmt.Errorf("unable to create snapshot: %v", err) 1579 } 1580 1581 // Invoking the scheduler for every job so that we can populate the number 1582 // of queued allocations for every job 1583 for { 1584 rawJob := iter.Next() 1585 if rawJob == nil { 1586 break 1587 } 1588 job := rawJob.(*structs.Job) 1589 1590 // Nothing to do for queued allocations if the job is a parent periodic/parameterized job 1591 if job.IsParameterized() || job.IsPeriodic() { 1592 continue 1593 } 1594 planner := &scheduler.Harness{ 1595 State: &snap.StateStore, 1596 } 1597 // Create an eval and mark it as requiring annotations and insert that as well 1598 eval := &structs.Evaluation{ 1599 ID: uuid.Generate(), 1600 Namespace: job.Namespace, 1601 Priority: job.Priority, 1602 Type: job.Type, 1603 TriggeredBy: structs.EvalTriggerJobRegister, 1604 JobID: job.ID, 1605 JobModifyIndex: job.JobModifyIndex + 1, 1606 Status: structs.EvalStatusPending, 1607 AnnotatePlan: true, 1608 } 1609 snap.UpsertEvals(100, []*structs.Evaluation{eval}) 1610 // Create the scheduler and run it 1611 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 1612 if err != nil { 1613 return err 1614 } 1615 1616 if err := sched.Process(eval); err != nil { 1617 return err 1618 } 1619 1620 // Get the job summary from the fsm state store 1621 originalSummary, err := n.state.JobSummaryByID(ws, job.Namespace, job.ID) 1622 if err != nil { 1623 return err 1624 } 1625 summary := originalSummary.Copy() 1626 1627 // Add the allocations scheduler has made to queued since these 1628 // allocations are never getting placed until the scheduler is invoked 1629 // with a real planner 1630 if l := len(planner.Plans); l != 1 { 1631 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 1632 } 1633 for _, allocations := range planner.Plans[0].NodeAllocation { 1634 for _, allocation := range allocations { 1635 tgSummary, ok := summary.Summary[allocation.TaskGroup] 1636 if !ok { 1637 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 1638 } 1639 tgSummary.Queued += 1 1640 summary.Summary[allocation.TaskGroup] = tgSummary 1641 } 1642 } 1643 1644 // Add the queued allocations attached to the evaluation to the queued 1645 // counter of the job summary 1646 if l := len(planner.Evals); l != 1 { 1647 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 1648 } 1649 for tg, queued := range planner.Evals[0].QueuedAllocations { 1650 tgSummary, ok := summary.Summary[tg] 1651 if !ok { 1652 return fmt.Errorf("task group %q not found while updating queued count", tg) 1653 } 1654 1655 // We add instead of setting here because we want to take into 1656 // consideration what the scheduler with a mock planner thinks it 1657 // placed. Those should be counted as queued as well 1658 tgSummary.Queued += queued 1659 summary.Summary[tg] = tgSummary 1660 } 1661 1662 if !reflect.DeepEqual(summary, originalSummary) { 1663 summary.ModifyIndex = index 1664 if err := n.state.UpsertJobSummary(index, summary); err != nil { 1665 return err 1666 } 1667 } 1668 } 1669 return nil 1670 } 1671 1672 func (n *nomadFSM) applyUpsertScalingEvent(buf []byte, index uint64) interface{} { 1673 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_scaling_event"}, time.Now()) 1674 var req structs.ScalingEventRequest 1675 if err := structs.Decode(buf, &req); err != nil { 1676 panic(fmt.Errorf("failed to decode request: %v", err)) 1677 } 1678 1679 if err := n.state.UpsertScalingEvent(index, &req); err != nil { 1680 n.logger.Error("UpsertScalingEvent failed", "error", err) 1681 return err 1682 } 1683 1684 return nil 1685 } 1686 1687 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 1688 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 1689 // Register the nodes 1690 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 1691 1692 // Write the header 1693 header := snapshotHeader{} 1694 if err := encoder.Encode(&header); err != nil { 1695 sink.Cancel() 1696 return err 1697 } 1698 1699 // Write the time table 1700 sink.Write([]byte{byte(TimeTableSnapshot)}) 1701 if err := s.timetable.Serialize(encoder); err != nil { 1702 sink.Cancel() 1703 return err 1704 } 1705 1706 // Write all the data out 1707 if err := s.persistIndexes(sink, encoder); err != nil { 1708 sink.Cancel() 1709 return err 1710 } 1711 if err := s.persistNodes(sink, encoder); err != nil { 1712 sink.Cancel() 1713 return err 1714 } 1715 if err := s.persistJobs(sink, encoder); err != nil { 1716 sink.Cancel() 1717 return err 1718 } 1719 if err := s.persistEvals(sink, encoder); err != nil { 1720 sink.Cancel() 1721 return err 1722 } 1723 if err := s.persistAllocs(sink, encoder); err != nil { 1724 sink.Cancel() 1725 return err 1726 } 1727 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 1728 sink.Cancel() 1729 return err 1730 } 1731 if err := s.persistJobSummaries(sink, encoder); err != nil { 1732 sink.Cancel() 1733 return err 1734 } 1735 if err := s.persistVaultAccessors(sink, encoder); err != nil { 1736 sink.Cancel() 1737 return err 1738 } 1739 if err := s.persistSITokenAccessors(sink, encoder); err != nil { 1740 sink.Cancel() 1741 return err 1742 } 1743 if err := s.persistJobVersions(sink, encoder); err != nil { 1744 sink.Cancel() 1745 return err 1746 } 1747 if err := s.persistDeployments(sink, encoder); err != nil { 1748 sink.Cancel() 1749 return err 1750 } 1751 if err := s.persistScalingPolicies(sink, encoder); err != nil { 1752 sink.Cancel() 1753 return err 1754 } 1755 if err := s.persistScalingEvents(sink, encoder); err != nil { 1756 sink.Cancel() 1757 return err 1758 } 1759 if err := s.persistCSIPlugins(sink, encoder); err != nil { 1760 sink.Cancel() 1761 return err 1762 } 1763 if err := s.persistCSIVolumes(sink, encoder); err != nil { 1764 sink.Cancel() 1765 return err 1766 } 1767 if err := s.persistACLPolicies(sink, encoder); err != nil { 1768 sink.Cancel() 1769 return err 1770 } 1771 if err := s.persistACLTokens(sink, encoder); err != nil { 1772 sink.Cancel() 1773 return err 1774 } 1775 if err := s.persistEnterpriseTables(sink, encoder); err != nil { 1776 sink.Cancel() 1777 return err 1778 } 1779 if err := s.persistSchedulerConfig(sink, encoder); err != nil { 1780 sink.Cancel() 1781 return err 1782 } 1783 if err := s.persistClusterMetadata(sink, encoder); err != nil { 1784 sink.Cancel() 1785 return err 1786 } 1787 return nil 1788 } 1789 1790 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 1791 encoder *codec.Encoder) error { 1792 // Get all the indexes 1793 iter, err := s.snap.Indexes() 1794 if err != nil { 1795 return err 1796 } 1797 1798 for { 1799 // Get the next item 1800 raw := iter.Next() 1801 if raw == nil { 1802 break 1803 } 1804 1805 // Prepare the request struct 1806 idx := raw.(*state.IndexEntry) 1807 1808 // Write out a node registration 1809 sink.Write([]byte{byte(IndexSnapshot)}) 1810 if err := encoder.Encode(idx); err != nil { 1811 return err 1812 } 1813 } 1814 return nil 1815 } 1816 1817 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 1818 encoder *codec.Encoder) error { 1819 // Get all the nodes 1820 ws := memdb.NewWatchSet() 1821 nodes, err := s.snap.Nodes(ws) 1822 if err != nil { 1823 return err 1824 } 1825 1826 for { 1827 // Get the next item 1828 raw := nodes.Next() 1829 if raw == nil { 1830 break 1831 } 1832 1833 // Prepare the request struct 1834 node := raw.(*structs.Node) 1835 1836 // Write out a node registration 1837 sink.Write([]byte{byte(NodeSnapshot)}) 1838 if err := encoder.Encode(node); err != nil { 1839 return err 1840 } 1841 } 1842 return nil 1843 } 1844 1845 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 1846 encoder *codec.Encoder) error { 1847 // Get all the jobs 1848 ws := memdb.NewWatchSet() 1849 jobs, err := s.snap.Jobs(ws) 1850 if err != nil { 1851 return err 1852 } 1853 1854 for { 1855 // Get the next item 1856 raw := jobs.Next() 1857 if raw == nil { 1858 break 1859 } 1860 1861 // Prepare the request struct 1862 job := raw.(*structs.Job) 1863 1864 // Write out a job registration 1865 sink.Write([]byte{byte(JobSnapshot)}) 1866 if err := encoder.Encode(job); err != nil { 1867 return err 1868 } 1869 } 1870 return nil 1871 } 1872 1873 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1874 encoder *codec.Encoder) error { 1875 // Get all the evaluations 1876 ws := memdb.NewWatchSet() 1877 evals, err := s.snap.Evals(ws) 1878 if err != nil { 1879 return err 1880 } 1881 1882 for { 1883 // Get the next item 1884 raw := evals.Next() 1885 if raw == nil { 1886 break 1887 } 1888 1889 // Prepare the request struct 1890 eval := raw.(*structs.Evaluation) 1891 1892 // Write out the evaluation 1893 sink.Write([]byte{byte(EvalSnapshot)}) 1894 if err := encoder.Encode(eval); err != nil { 1895 return err 1896 } 1897 } 1898 return nil 1899 } 1900 1901 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1902 encoder *codec.Encoder) error { 1903 // Get all the allocations 1904 ws := memdb.NewWatchSet() 1905 allocs, err := s.snap.Allocs(ws) 1906 if err != nil { 1907 return err 1908 } 1909 1910 for { 1911 // Get the next item 1912 raw := allocs.Next() 1913 if raw == nil { 1914 break 1915 } 1916 1917 // Prepare the request struct 1918 alloc := raw.(*structs.Allocation) 1919 1920 // Write out the evaluation 1921 sink.Write([]byte{byte(AllocSnapshot)}) 1922 if err := encoder.Encode(alloc); err != nil { 1923 return err 1924 } 1925 } 1926 return nil 1927 } 1928 1929 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1930 encoder *codec.Encoder) error { 1931 // Get all the jobs 1932 ws := memdb.NewWatchSet() 1933 launches, err := s.snap.PeriodicLaunches(ws) 1934 if err != nil { 1935 return err 1936 } 1937 1938 for { 1939 // Get the next item 1940 raw := launches.Next() 1941 if raw == nil { 1942 break 1943 } 1944 1945 // Prepare the request struct 1946 launch := raw.(*structs.PeriodicLaunch) 1947 1948 // Write out a job registration 1949 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1950 if err := encoder.Encode(launch); err != nil { 1951 return err 1952 } 1953 } 1954 return nil 1955 } 1956 1957 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1958 encoder *codec.Encoder) error { 1959 1960 ws := memdb.NewWatchSet() 1961 summaries, err := s.snap.JobSummaries(ws) 1962 if err != nil { 1963 return err 1964 } 1965 1966 for { 1967 raw := summaries.Next() 1968 if raw == nil { 1969 break 1970 } 1971 1972 jobSummary := raw.(*structs.JobSummary) 1973 1974 sink.Write([]byte{byte(JobSummarySnapshot)}) 1975 if err := encoder.Encode(jobSummary); err != nil { 1976 return err 1977 } 1978 } 1979 return nil 1980 } 1981 1982 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1983 encoder *codec.Encoder) error { 1984 1985 ws := memdb.NewWatchSet() 1986 accessors, err := s.snap.VaultAccessors(ws) 1987 if err != nil { 1988 return err 1989 } 1990 1991 for { 1992 raw := accessors.Next() 1993 if raw == nil { 1994 break 1995 } 1996 1997 accessor := raw.(*structs.VaultAccessor) 1998 1999 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 2000 if err := encoder.Encode(accessor); err != nil { 2001 return err 2002 } 2003 } 2004 return nil 2005 } 2006 2007 func (s *nomadSnapshot) persistSITokenAccessors(sink raft.SnapshotSink, encoder *codec.Encoder) error { 2008 ws := memdb.NewWatchSet() 2009 accessors, err := s.snap.SITokenAccessors(ws) 2010 if err != nil { 2011 return err 2012 } 2013 2014 for raw := accessors.Next(); raw != nil; raw = accessors.Next() { 2015 accessor := raw.(*structs.SITokenAccessor) 2016 sink.Write([]byte{byte(ServiceIdentityTokenAccessorSnapshot)}) 2017 if err := encoder.Encode(accessor); err != nil { 2018 return err 2019 } 2020 } 2021 return nil 2022 } 2023 2024 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 2025 encoder *codec.Encoder) error { 2026 // Get all the jobs 2027 ws := memdb.NewWatchSet() 2028 versions, err := s.snap.JobVersions(ws) 2029 if err != nil { 2030 return err 2031 } 2032 2033 for { 2034 // Get the next item 2035 raw := versions.Next() 2036 if raw == nil { 2037 break 2038 } 2039 2040 // Prepare the request struct 2041 job := raw.(*structs.Job) 2042 2043 // Write out a job registration 2044 sink.Write([]byte{byte(JobVersionSnapshot)}) 2045 if err := encoder.Encode(job); err != nil { 2046 return err 2047 } 2048 } 2049 return nil 2050 } 2051 2052 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 2053 encoder *codec.Encoder) error { 2054 // Get all the jobs 2055 ws := memdb.NewWatchSet() 2056 deployments, err := s.snap.Deployments(ws) 2057 if err != nil { 2058 return err 2059 } 2060 2061 for { 2062 // Get the next item 2063 raw := deployments.Next() 2064 if raw == nil { 2065 break 2066 } 2067 2068 // Prepare the request struct 2069 deployment := raw.(*structs.Deployment) 2070 2071 // Write out a job registration 2072 sink.Write([]byte{byte(DeploymentSnapshot)}) 2073 if err := encoder.Encode(deployment); err != nil { 2074 return err 2075 } 2076 } 2077 return nil 2078 } 2079 2080 func (s *nomadSnapshot) persistACLPolicies(sink raft.SnapshotSink, 2081 encoder *codec.Encoder) error { 2082 // Get all the policies 2083 ws := memdb.NewWatchSet() 2084 policies, err := s.snap.ACLPolicies(ws) 2085 if err != nil { 2086 return err 2087 } 2088 2089 for { 2090 // Get the next item 2091 raw := policies.Next() 2092 if raw == nil { 2093 break 2094 } 2095 2096 // Prepare the request struct 2097 policy := raw.(*structs.ACLPolicy) 2098 2099 // Write out a policy registration 2100 sink.Write([]byte{byte(ACLPolicySnapshot)}) 2101 if err := encoder.Encode(policy); err != nil { 2102 return err 2103 } 2104 } 2105 return nil 2106 } 2107 2108 func (s *nomadSnapshot) persistACLTokens(sink raft.SnapshotSink, 2109 encoder *codec.Encoder) error { 2110 // Get all the policies 2111 ws := memdb.NewWatchSet() 2112 tokens, err := s.snap.ACLTokens(ws) 2113 if err != nil { 2114 return err 2115 } 2116 2117 for { 2118 // Get the next item 2119 raw := tokens.Next() 2120 if raw == nil { 2121 break 2122 } 2123 2124 // Prepare the request struct 2125 token := raw.(*structs.ACLToken) 2126 2127 // Write out a token registration 2128 sink.Write([]byte{byte(ACLTokenSnapshot)}) 2129 if err := encoder.Encode(token); err != nil { 2130 return err 2131 } 2132 } 2133 return nil 2134 } 2135 2136 func (s *nomadSnapshot) persistSchedulerConfig(sink raft.SnapshotSink, 2137 encoder *codec.Encoder) error { 2138 // Get scheduler config 2139 _, schedConfig, err := s.snap.SchedulerConfig() 2140 if err != nil { 2141 return err 2142 } 2143 if schedConfig == nil { 2144 return nil 2145 } 2146 // Write out scheduler config 2147 sink.Write([]byte{byte(SchedulerConfigSnapshot)}) 2148 if err := encoder.Encode(schedConfig); err != nil { 2149 return err 2150 } 2151 return nil 2152 } 2153 2154 func (s *nomadSnapshot) persistClusterMetadata(sink raft.SnapshotSink, 2155 encoder *codec.Encoder) error { 2156 2157 // Get the cluster metadata 2158 clusterMetadata, err := s.snap.ClusterMetadata() 2159 if err != nil { 2160 return err 2161 } 2162 if clusterMetadata == nil { 2163 return nil 2164 } 2165 2166 // Write out the cluster metadata 2167 sink.Write([]byte{byte(ClusterMetadataSnapshot)}) 2168 if err := encoder.Encode(clusterMetadata); err != nil { 2169 return err 2170 } 2171 2172 return nil 2173 } 2174 2175 func (s *nomadSnapshot) persistScalingPolicies(sink raft.SnapshotSink, 2176 encoder *codec.Encoder) error { 2177 2178 // Get all the scaling policies 2179 ws := memdb.NewWatchSet() 2180 scalingPolicies, err := s.snap.ScalingPolicies(ws) 2181 if err != nil { 2182 return err 2183 } 2184 2185 for { 2186 // Get the next item 2187 raw := scalingPolicies.Next() 2188 if raw == nil { 2189 break 2190 } 2191 2192 // Prepare the request struct 2193 scalingPolicy := raw.(*structs.ScalingPolicy) 2194 2195 // Write out a scaling policy snapshot 2196 sink.Write([]byte{byte(ScalingPolicySnapshot)}) 2197 if err := encoder.Encode(scalingPolicy); err != nil { 2198 return err 2199 } 2200 } 2201 return nil 2202 } 2203 2204 func (s *nomadSnapshot) persistScalingEvents(sink raft.SnapshotSink, encoder *codec.Encoder) error { 2205 // Get all the scaling events 2206 ws := memdb.NewWatchSet() 2207 iter, err := s.snap.ScalingEvents(ws) 2208 if err != nil { 2209 return err 2210 } 2211 2212 for { 2213 // Get the next item 2214 raw := iter.Next() 2215 if raw == nil { 2216 break 2217 } 2218 2219 // Prepare the request struct 2220 events := raw.(*structs.JobScalingEvents) 2221 2222 // Write out a scaling events snapshot 2223 sink.Write([]byte{byte(ScalingEventsSnapshot)}) 2224 if err := encoder.Encode(events); err != nil { 2225 return err 2226 } 2227 } 2228 return nil 2229 } 2230 2231 func (s *nomadSnapshot) persistCSIPlugins(sink raft.SnapshotSink, 2232 encoder *codec.Encoder) error { 2233 2234 // Get all the CSI plugins 2235 ws := memdb.NewWatchSet() 2236 plugins, err := s.snap.CSIPlugins(ws) 2237 if err != nil { 2238 return err 2239 } 2240 2241 for { 2242 // Get the next item 2243 raw := plugins.Next() 2244 if raw == nil { 2245 break 2246 } 2247 2248 // Prepare the request struct 2249 plugin := raw.(*structs.CSIPlugin) 2250 2251 // Write out a plugin snapshot 2252 sink.Write([]byte{byte(CSIPluginSnapshot)}) 2253 if err := encoder.Encode(plugin); err != nil { 2254 return err 2255 } 2256 } 2257 return nil 2258 } 2259 2260 func (s *nomadSnapshot) persistCSIVolumes(sink raft.SnapshotSink, 2261 encoder *codec.Encoder) error { 2262 2263 // Get all the CSI volumes 2264 ws := memdb.NewWatchSet() 2265 volumes, err := s.snap.CSIVolumes(ws) 2266 if err != nil { 2267 return err 2268 } 2269 2270 for { 2271 // Get the next item 2272 raw := volumes.Next() 2273 if raw == nil { 2274 break 2275 } 2276 2277 // Prepare the request struct 2278 volume := raw.(*structs.CSIVolume) 2279 2280 // Write out a volume snapshot 2281 sink.Write([]byte{byte(CSIVolumeSnapshot)}) 2282 if err := encoder.Encode(volume); err != nil { 2283 return err 2284 } 2285 } 2286 return nil 2287 } 2288 2289 // Release is a no-op, as we just need to GC the pointer 2290 // to the state store snapshot. There is nothing to explicitly 2291 // cleanup. 2292 func (s *nomadSnapshot) Release() {}