github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/state" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/nomad/scheduler" 17 "github.com/hashicorp/raft" 18 "github.com/ugorji/go/codec" 19 ) 20 21 const ( 22 // timeTableGranularity is the granularity of index to time tracking 23 timeTableGranularity = 5 * time.Minute 24 25 // timeTableLimit is the maximum limit of our tracking 26 timeTableLimit = 72 * time.Hour 27 ) 28 29 // SnapshotType is prefixed to a record in the FSM snapshot 30 // so that we can determine the type for restore 31 type SnapshotType byte 32 33 const ( 34 NodeSnapshot SnapshotType = iota 35 JobSnapshot 36 IndexSnapshot 37 EvalSnapshot 38 AllocSnapshot 39 TimeTableSnapshot 40 PeriodicLaunchSnapshot 41 JobSummarySnapshot 42 VaultAccessorSnapshot 43 JobVersionSnapshot 44 DeploymentSnapshot 45 ACLPolicySnapshot 46 ACLTokenSnapshot 47 ) 48 49 // LogApplier is the definition of a function that can apply a Raft log 50 type LogApplier func(buf []byte, index uint64) interface{} 51 52 // LogAppliers is a mapping of the Raft MessageType to the appropriate log 53 // applier 54 type LogAppliers map[structs.MessageType]LogApplier 55 56 // SnapshotRestorer is the definition of a function that can apply a Raft log 57 type SnapshotRestorer func(restore *state.StateRestore, dec *codec.Decoder) error 58 59 // SnapshotRestorers is a mapping of the SnapshotType to the appropriate 60 // snapshot restorer. 61 type SnapshotRestorers map[SnapshotType]SnapshotRestorer 62 63 // nomadFSM implements a finite state machine that is used 64 // along with Raft to provide strong consistency. We implement 65 // this outside the Server to avoid exposing this outside the package. 66 type nomadFSM struct { 67 evalBroker *EvalBroker 68 blockedEvals *BlockedEvals 69 periodicDispatcher *PeriodicDispatch 70 logger *log.Logger 71 state *state.StateStore 72 timetable *TimeTable 73 74 // config is the FSM config 75 config *FSMConfig 76 77 // enterpriseAppliers holds the set of enterprise only LogAppliers 78 enterpriseAppliers LogAppliers 79 80 // enterpriseRestorers holds the set of enterprise only snapshot restorers 81 enterpriseRestorers SnapshotRestorers 82 83 // stateLock is only used to protect outside callers to State() from 84 // racing with Restore(), which is called by Raft (it puts in a totally 85 // new state store). Everything internal here is synchronized by the 86 // Raft side, so doesn't need to lock this. 87 stateLock sync.RWMutex 88 } 89 90 // nomadSnapshot is used to provide a snapshot of the current 91 // state in a way that can be accessed concurrently with operations 92 // that may modify the live state. 93 type nomadSnapshot struct { 94 snap *state.StateSnapshot 95 timetable *TimeTable 96 } 97 98 // snapshotHeader is the first entry in our snapshot 99 type snapshotHeader struct { 100 } 101 102 // FSMConfig is used to configure the FSM 103 type FSMConfig struct { 104 // EvalBroker is the evaluation broker evaluations should be added to 105 EvalBroker *EvalBroker 106 107 // Periodic is the periodic job dispatcher that periodic jobs should be 108 // added/removed from 109 Periodic *PeriodicDispatch 110 111 // BlockedEvals is the blocked eval tracker that blocked evaulations should 112 // be added to. 113 Blocked *BlockedEvals 114 115 // LogOutput is the writer logs should be written to 116 LogOutput io.Writer 117 118 // Region is the region of the server embedding the FSM 119 Region string 120 } 121 122 // NewFSMPath is used to construct a new FSM with a blank state 123 func NewFSM(config *FSMConfig) (*nomadFSM, error) { 124 // Create a state store 125 sconfig := &state.StateStoreConfig{ 126 LogOutput: config.LogOutput, 127 Region: config.Region, 128 } 129 state, err := state.NewStateStore(sconfig) 130 if err != nil { 131 return nil, err 132 } 133 134 fsm := &nomadFSM{ 135 evalBroker: config.EvalBroker, 136 periodicDispatcher: config.Periodic, 137 blockedEvals: config.Blocked, 138 logger: log.New(config.LogOutput, "", log.LstdFlags), 139 config: config, 140 state: state, 141 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 142 enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8), 143 enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8), 144 } 145 146 // Register all the log applier functions 147 fsm.registerLogAppliers() 148 149 // Register all the snapshot restorer functions 150 fsm.registerSnapshotRestorers() 151 152 return fsm, nil 153 } 154 155 // Close is used to cleanup resources associated with the FSM 156 func (n *nomadFSM) Close() error { 157 return nil 158 } 159 160 // State is used to return a handle to the current state 161 func (n *nomadFSM) State() *state.StateStore { 162 n.stateLock.RLock() 163 defer n.stateLock.RUnlock() 164 return n.state 165 } 166 167 // TimeTable returns the time table of transactions 168 func (n *nomadFSM) TimeTable() *TimeTable { 169 return n.timetable 170 } 171 172 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 173 buf := log.Data 174 msgType := structs.MessageType(buf[0]) 175 176 // Witness this write 177 n.timetable.Witness(log.Index, time.Now().UTC()) 178 179 // Check if this message type should be ignored when unknown. This is 180 // used so that new commands can be added with developer control if older 181 // versions can safely ignore the command, or if they should crash. 182 ignoreUnknown := false 183 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 184 msgType &= ^structs.IgnoreUnknownTypeFlag 185 ignoreUnknown = true 186 } 187 188 switch msgType { 189 case structs.NodeRegisterRequestType: 190 return n.applyUpsertNode(buf[1:], log.Index) 191 case structs.NodeDeregisterRequestType: 192 return n.applyDeregisterNode(buf[1:], log.Index) 193 case structs.NodeUpdateStatusRequestType: 194 return n.applyStatusUpdate(buf[1:], log.Index) 195 case structs.NodeUpdateDrainRequestType: 196 return n.applyDrainUpdate(buf[1:], log.Index) 197 case structs.JobRegisterRequestType: 198 return n.applyUpsertJob(buf[1:], log.Index) 199 case structs.JobDeregisterRequestType: 200 return n.applyDeregisterJob(buf[1:], log.Index) 201 case structs.EvalUpdateRequestType: 202 return n.applyUpdateEval(buf[1:], log.Index) 203 case structs.EvalDeleteRequestType: 204 return n.applyDeleteEval(buf[1:], log.Index) 205 case structs.AllocUpdateRequestType: 206 return n.applyAllocUpdate(buf[1:], log.Index) 207 case structs.AllocClientUpdateRequestType: 208 return n.applyAllocClientUpdate(buf[1:], log.Index) 209 case structs.ReconcileJobSummariesRequestType: 210 return n.applyReconcileSummaries(buf[1:], log.Index) 211 case structs.VaultAccessorRegisterRequestType: 212 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 213 case structs.VaultAccessorDegisterRequestType: 214 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 215 case structs.ApplyPlanResultsRequestType: 216 return n.applyPlanResults(buf[1:], log.Index) 217 case structs.DeploymentStatusUpdateRequestType: 218 return n.applyDeploymentStatusUpdate(buf[1:], log.Index) 219 case structs.DeploymentPromoteRequestType: 220 return n.applyDeploymentPromotion(buf[1:], log.Index) 221 case structs.DeploymentAllocHealthRequestType: 222 return n.applyDeploymentAllocHealth(buf[1:], log.Index) 223 case structs.DeploymentDeleteRequestType: 224 return n.applyDeploymentDelete(buf[1:], log.Index) 225 case structs.JobStabilityRequestType: 226 return n.applyJobStability(buf[1:], log.Index) 227 case structs.ACLPolicyUpsertRequestType: 228 return n.applyACLPolicyUpsert(buf[1:], log.Index) 229 case structs.ACLPolicyDeleteRequestType: 230 return n.applyACLPolicyDelete(buf[1:], log.Index) 231 case structs.ACLTokenUpsertRequestType: 232 return n.applyACLTokenUpsert(buf[1:], log.Index) 233 case structs.ACLTokenDeleteRequestType: 234 return n.applyACLTokenDelete(buf[1:], log.Index) 235 case structs.ACLTokenBootstrapRequestType: 236 return n.applyACLTokenBootstrap(buf[1:], log.Index) 237 case structs.AutopilotRequestType: 238 return n.applyAutopilotUpdate(buf[1:], log.Index) 239 } 240 241 // Check enterprise only message types. 242 if applier, ok := n.enterpriseAppliers[msgType]; ok { 243 return applier(buf[1:], log.Index) 244 } 245 246 // We didn't match anything, either panic or ignore 247 if ignoreUnknown { 248 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 249 return nil 250 } 251 252 panic(fmt.Errorf("failed to apply request: %#v", buf)) 253 } 254 255 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 256 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 257 var req structs.NodeRegisterRequest 258 if err := structs.Decode(buf, &req); err != nil { 259 panic(fmt.Errorf("failed to decode request: %v", err)) 260 } 261 262 if err := n.state.UpsertNode(index, req.Node); err != nil { 263 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 264 return err 265 } 266 267 // Unblock evals for the nodes computed node class if it is in a ready 268 // state. 269 if req.Node.Status == structs.NodeStatusReady { 270 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 271 } 272 273 return nil 274 } 275 276 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 277 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 278 var req structs.NodeDeregisterRequest 279 if err := structs.Decode(buf, &req); err != nil { 280 panic(fmt.Errorf("failed to decode request: %v", err)) 281 } 282 283 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 284 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 285 return err 286 } 287 return nil 288 } 289 290 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 291 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 292 var req structs.NodeUpdateStatusRequest 293 if err := structs.Decode(buf, &req); err != nil { 294 panic(fmt.Errorf("failed to decode request: %v", err)) 295 } 296 297 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 298 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 299 return err 300 } 301 302 // Unblock evals for the nodes computed node class if it is in a ready 303 // state. 304 if req.Status == structs.NodeStatusReady { 305 ws := memdb.NewWatchSet() 306 node, err := n.state.NodeByID(ws, req.NodeID) 307 if err != nil { 308 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 309 return err 310 311 } 312 n.blockedEvals.Unblock(node.ComputedClass, index) 313 } 314 315 return nil 316 } 317 318 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 319 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 320 var req structs.NodeUpdateDrainRequest 321 if err := structs.Decode(buf, &req); err != nil { 322 panic(fmt.Errorf("failed to decode request: %v", err)) 323 } 324 325 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 326 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 327 return err 328 } 329 return nil 330 } 331 332 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 333 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 334 var req structs.JobRegisterRequest 335 if err := structs.Decode(buf, &req); err != nil { 336 panic(fmt.Errorf("failed to decode request: %v", err)) 337 } 338 339 /* Handle upgrade paths: 340 * - Empty maps and slices should be treated as nil to avoid 341 * un-intended destructive updates in scheduler since we use 342 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 343 * the incoming job. 344 * - Migrate from old style upgrade stanza that used only a stagger. 345 */ 346 req.Job.Canonicalize() 347 348 if err := n.state.UpsertJob(index, req.Job); err != nil { 349 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 350 return err 351 } 352 353 // We always add the job to the periodic dispatcher because there is the 354 // possibility that the periodic spec was removed and then we should stop 355 // tracking it. 356 if err := n.periodicDispatcher.Add(req.Job); err != nil { 357 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 358 return err 359 } 360 361 // Create a watch set 362 ws := memdb.NewWatchSet() 363 364 // If it is an active periodic job, record the time it was inserted. This is 365 // necessary for recovering during leader election. It is possible that from 366 // the time it is added to when it was suppose to launch, leader election 367 // occurs and the job was not launched. In this case, we use the insertion 368 // time to determine if a launch was missed. 369 if req.Job.IsPeriodicActive() { 370 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Namespace, req.Job.ID) 371 if err != nil { 372 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 373 return err 374 } 375 376 // Record the insertion time as a launch. We overload the launch table 377 // such that the first entry is the insertion time. 378 if prevLaunch == nil { 379 launch := &structs.PeriodicLaunch{ 380 ID: req.Job.ID, 381 Namespace: req.Namespace, 382 Launch: time.Now(), 383 } 384 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 385 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 386 return err 387 } 388 } 389 } 390 391 // Check if the parent job is periodic and mark the launch time. 392 parentID := req.Job.ParentID 393 if parentID != "" { 394 parent, err := n.state.JobByID(ws, req.Namespace, parentID) 395 if err != nil { 396 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 397 return err 398 } else if parent == nil { 399 // The parent has been deregistered. 400 return nil 401 } 402 403 if parent.IsPeriodic() && !parent.IsParameterized() { 404 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 405 if err != nil { 406 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 407 return err 408 } 409 410 launch := &structs.PeriodicLaunch{ 411 ID: parentID, 412 Namespace: req.Namespace, 413 Launch: t, 414 } 415 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 416 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 417 return err 418 } 419 } 420 } 421 422 return nil 423 } 424 425 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 426 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 427 var req structs.JobDeregisterRequest 428 if err := structs.Decode(buf, &req); err != nil { 429 panic(fmt.Errorf("failed to decode request: %v", err)) 430 } 431 432 // If it is periodic remove it from the dispatcher 433 if err := n.periodicDispatcher.Remove(req.Namespace, req.JobID); err != nil { 434 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 435 return err 436 } 437 438 if req.Purge { 439 if err := n.state.DeleteJob(index, req.Namespace, req.JobID); err != nil { 440 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 441 return err 442 } 443 444 // We always delete from the periodic launch table because it is possible that 445 // the job was updated to be non-perioidic, thus checking if it is periodic 446 // doesn't ensure we clean it up properly. 447 n.state.DeletePeriodicLaunch(index, req.Namespace, req.JobID) 448 } else { 449 // Get the current job and mark it as stopped and re-insert it. 450 ws := memdb.NewWatchSet() 451 current, err := n.state.JobByID(ws, req.Namespace, req.JobID) 452 if err != nil { 453 n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err) 454 return err 455 } 456 457 if current == nil { 458 return fmt.Errorf("job %q in namespace %q doesn't exist to be deregistered", req.JobID, req.Namespace) 459 } 460 461 stopped := current.Copy() 462 stopped.Stop = true 463 464 if err := n.state.UpsertJob(index, stopped); err != nil { 465 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 466 return err 467 } 468 } 469 470 return nil 471 } 472 473 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 474 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 475 var req structs.EvalUpdateRequest 476 if err := structs.Decode(buf, &req); err != nil { 477 panic(fmt.Errorf("failed to decode request: %v", err)) 478 } 479 return n.upsertEvals(index, req.Evals) 480 } 481 482 func (n *nomadFSM) upsertEvals(index uint64, evals []*structs.Evaluation) error { 483 if err := n.state.UpsertEvals(index, evals); err != nil { 484 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 485 return err 486 } 487 488 for _, eval := range evals { 489 if eval.ShouldEnqueue() { 490 n.evalBroker.Enqueue(eval) 491 } else if eval.ShouldBlock() { 492 n.blockedEvals.Block(eval) 493 } else if eval.Status == structs.EvalStatusComplete && 494 len(eval.FailedTGAllocs) == 0 { 495 // If we have a successful evaluation for a node, untrack any 496 // blocked evaluation 497 n.blockedEvals.Untrack(eval.JobID) 498 } 499 } 500 return nil 501 } 502 503 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 504 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 505 var req structs.EvalDeleteRequest 506 if err := structs.Decode(buf, &req); err != nil { 507 panic(fmt.Errorf("failed to decode request: %v", err)) 508 } 509 510 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 511 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 512 return err 513 } 514 return nil 515 } 516 517 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 518 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 519 var req structs.AllocUpdateRequest 520 if err := structs.Decode(buf, &req); err != nil { 521 panic(fmt.Errorf("failed to decode request: %v", err)) 522 } 523 524 // Attach the job to all the allocations. It is pulled out in the 525 // payload to avoid the redundancy of encoding, but should be denormalized 526 // prior to being inserted into MemDB. 527 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 528 529 // Calculate the total resources of allocations. It is pulled out in the 530 // payload to avoid encoding something that can be computed, but should be 531 // denormalized prior to being inserted into MemDB. 532 for _, alloc := range req.Alloc { 533 if alloc.Resources != nil { 534 // COMPAT 0.4.1 -> 0.5 535 // Set the shared resources for allocations which don't have them 536 if alloc.SharedResources == nil { 537 alloc.SharedResources = &structs.Resources{ 538 DiskMB: alloc.Resources.DiskMB, 539 } 540 } 541 542 continue 543 } 544 545 alloc.Resources = new(structs.Resources) 546 for _, task := range alloc.TaskResources { 547 alloc.Resources.Add(task) 548 } 549 550 // Add the shared resources 551 alloc.Resources.Add(alloc.SharedResources) 552 } 553 554 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 555 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 556 return err 557 } 558 return nil 559 } 560 561 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 562 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 563 var req structs.AllocUpdateRequest 564 if err := structs.Decode(buf, &req); err != nil { 565 panic(fmt.Errorf("failed to decode request: %v", err)) 566 } 567 if len(req.Alloc) == 0 { 568 return nil 569 } 570 571 // Create a watch set 572 ws := memdb.NewWatchSet() 573 574 // Updating the allocs with the job id and task group name 575 for _, alloc := range req.Alloc { 576 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 577 alloc.JobID = existing.JobID 578 alloc.TaskGroup = existing.TaskGroup 579 } 580 } 581 582 // Update all the client allocations 583 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 584 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 585 return err 586 } 587 588 // Update any evals 589 if len(req.Evals) > 0 { 590 if err := n.upsertEvals(index, req.Evals); err != nil { 591 n.logger.Printf("[ERR] nomad.fsm: applyAllocClientUpdate failed to update evaluations: %v", err) 592 return err 593 } 594 } 595 596 // Unblock evals for the nodes computed node class if the client has 597 // finished running an allocation. 598 for _, alloc := range req.Alloc { 599 if alloc.ClientStatus == structs.AllocClientStatusComplete || 600 alloc.ClientStatus == structs.AllocClientStatusFailed { 601 nodeID := alloc.NodeID 602 node, err := n.state.NodeByID(ws, nodeID) 603 if err != nil || node == nil { 604 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 605 return err 606 607 } 608 609 // Unblock any associated quota 610 quota, err := n.allocQuota(alloc.ID) 611 if err != nil { 612 n.logger.Printf("[ERR] nomad.fsm: looking up quota associated with alloc %q failed: %v", alloc.ID, err) 613 return err 614 } 615 616 n.blockedEvals.UnblockClassAndQuota(node.ComputedClass, quota, index) 617 } 618 } 619 620 return nil 621 } 622 623 // applyReconcileSummaries reconciles summaries for all the jobs 624 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 625 if err := n.state.ReconcileJobSummaries(index); err != nil { 626 return err 627 } 628 return n.reconcileQueuedAllocations(index) 629 } 630 631 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 632 // and task 633 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 634 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 635 var req structs.VaultAccessorsRequest 636 if err := structs.Decode(buf, &req); err != nil { 637 panic(fmt.Errorf("failed to decode request: %v", err)) 638 } 639 640 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 641 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 642 return err 643 } 644 645 return nil 646 } 647 648 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 649 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 650 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 651 var req structs.VaultAccessorsRequest 652 if err := structs.Decode(buf, &req); err != nil { 653 panic(fmt.Errorf("failed to decode request: %v", err)) 654 } 655 656 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 657 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 658 return err 659 } 660 661 return nil 662 } 663 664 // applyPlanApply applies the results of a plan application 665 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 666 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 667 var req structs.ApplyPlanResultsRequest 668 if err := structs.Decode(buf, &req); err != nil { 669 panic(fmt.Errorf("failed to decode request: %v", err)) 670 } 671 672 if err := n.state.UpsertPlanResults(index, &req); err != nil { 673 n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err) 674 return err 675 } 676 677 return nil 678 } 679 680 // applyDeploymentStatusUpdate is used to update the status of an existing 681 // deployment 682 func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} { 683 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now()) 684 var req structs.DeploymentStatusUpdateRequest 685 if err := structs.Decode(buf, &req); err != nil { 686 panic(fmt.Errorf("failed to decode request: %v", err)) 687 } 688 689 if err := n.state.UpdateDeploymentStatus(index, &req); err != nil { 690 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentStatusUpdate failed: %v", err) 691 return err 692 } 693 694 if req.Eval != nil && req.Eval.ShouldEnqueue() { 695 n.evalBroker.Enqueue(req.Eval) 696 } 697 698 return nil 699 } 700 701 // applyDeploymentPromotion is used to promote canaries in a deployment 702 func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} { 703 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now()) 704 var req structs.ApplyDeploymentPromoteRequest 705 if err := structs.Decode(buf, &req); err != nil { 706 panic(fmt.Errorf("failed to decode request: %v", err)) 707 } 708 709 if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil { 710 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentPromotion failed: %v", err) 711 return err 712 } 713 714 if req.Eval != nil && req.Eval.ShouldEnqueue() { 715 n.evalBroker.Enqueue(req.Eval) 716 } 717 718 return nil 719 } 720 721 // applyDeploymentAllocHealth is used to set the health of allocations as part 722 // of a deployment 723 func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} { 724 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now()) 725 var req structs.ApplyDeploymentAllocHealthRequest 726 if err := structs.Decode(buf, &req); err != nil { 727 panic(fmt.Errorf("failed to decode request: %v", err)) 728 } 729 730 if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil { 731 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentAllocHealth failed: %v", err) 732 return err 733 } 734 735 if req.Eval != nil && req.Eval.ShouldEnqueue() { 736 n.evalBroker.Enqueue(req.Eval) 737 } 738 739 return nil 740 } 741 742 // applyDeploymentDelete is used to delete a set of deployments 743 func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} { 744 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now()) 745 var req structs.DeploymentDeleteRequest 746 if err := structs.Decode(buf, &req); err != nil { 747 panic(fmt.Errorf("failed to decode request: %v", err)) 748 } 749 750 if err := n.state.DeleteDeployment(index, req.Deployments); err != nil { 751 n.logger.Printf("[ERR] nomad.fsm: DeleteDeployment failed: %v", err) 752 return err 753 } 754 755 return nil 756 } 757 758 // applyJobStability is used to set the stability of a job 759 func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} { 760 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now()) 761 var req structs.JobStabilityRequest 762 if err := structs.Decode(buf, &req); err != nil { 763 panic(fmt.Errorf("failed to decode request: %v", err)) 764 } 765 766 if err := n.state.UpdateJobStability(index, req.Namespace, req.JobID, req.JobVersion, req.Stable); err != nil { 767 n.logger.Printf("[ERR] nomad.fsm: UpdateJobStability failed: %v", err) 768 return err 769 } 770 771 return nil 772 } 773 774 // applyACLPolicyUpsert is used to upsert a set of policies 775 func (n *nomadFSM) applyACLPolicyUpsert(buf []byte, index uint64) interface{} { 776 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_upsert"}, time.Now()) 777 var req structs.ACLPolicyUpsertRequest 778 if err := structs.Decode(buf, &req); err != nil { 779 panic(fmt.Errorf("failed to decode request: %v", err)) 780 } 781 782 if err := n.state.UpsertACLPolicies(index, req.Policies); err != nil { 783 n.logger.Printf("[ERR] nomad.fsm: UpsertACLPolicies failed: %v", err) 784 return err 785 } 786 return nil 787 } 788 789 // applyACLPolicyDelete is used to delete a set of policies 790 func (n *nomadFSM) applyACLPolicyDelete(buf []byte, index uint64) interface{} { 791 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_delete"}, time.Now()) 792 var req structs.ACLPolicyDeleteRequest 793 if err := structs.Decode(buf, &req); err != nil { 794 panic(fmt.Errorf("failed to decode request: %v", err)) 795 } 796 797 if err := n.state.DeleteACLPolicies(index, req.Names); err != nil { 798 n.logger.Printf("[ERR] nomad.fsm: DeleteACLPolicies failed: %v", err) 799 return err 800 } 801 return nil 802 } 803 804 // applyACLTokenUpsert is used to upsert a set of policies 805 func (n *nomadFSM) applyACLTokenUpsert(buf []byte, index uint64) interface{} { 806 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_upsert"}, time.Now()) 807 var req structs.ACLTokenUpsertRequest 808 if err := structs.Decode(buf, &req); err != nil { 809 panic(fmt.Errorf("failed to decode request: %v", err)) 810 } 811 812 if err := n.state.UpsertACLTokens(index, req.Tokens); err != nil { 813 n.logger.Printf("[ERR] nomad.fsm: UpsertACLTokens failed: %v", err) 814 return err 815 } 816 return nil 817 } 818 819 // applyACLTokenDelete is used to delete a set of policies 820 func (n *nomadFSM) applyACLTokenDelete(buf []byte, index uint64) interface{} { 821 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_delete"}, time.Now()) 822 var req structs.ACLTokenDeleteRequest 823 if err := structs.Decode(buf, &req); err != nil { 824 panic(fmt.Errorf("failed to decode request: %v", err)) 825 } 826 827 if err := n.state.DeleteACLTokens(index, req.AccessorIDs); err != nil { 828 n.logger.Printf("[ERR] nomad.fsm: DeleteACLTokens failed: %v", err) 829 return err 830 } 831 return nil 832 } 833 834 // applyACLTokenBootstrap is used to bootstrap an ACL token 835 func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{} { 836 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_bootstrap"}, time.Now()) 837 var req structs.ACLTokenBootstrapRequest 838 if err := structs.Decode(buf, &req); err != nil { 839 panic(fmt.Errorf("failed to decode request: %v", err)) 840 } 841 842 if err := n.state.BootstrapACLTokens(index, req.ResetIndex, req.Token); err != nil { 843 n.logger.Printf("[ERR] nomad.fsm: BootstrapACLToken failed: %v", err) 844 return err 845 } 846 return nil 847 } 848 849 func (n *nomadFSM) applyAutopilotUpdate(buf []byte, index uint64) interface{} { 850 var req structs.AutopilotSetConfigRequest 851 if err := structs.Decode(buf, &req); err != nil { 852 panic(fmt.Errorf("failed to decode request: %v", err)) 853 } 854 defer metrics.MeasureSince([]string{"nomad", "fsm", "autopilot"}, time.Now()) 855 856 if req.CAS { 857 act, err := n.state.AutopilotCASConfig(index, req.Config.ModifyIndex, &req.Config) 858 if err != nil { 859 return err 860 } 861 return act 862 } 863 return n.state.AutopilotSetConfig(index, &req.Config) 864 } 865 866 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 867 // Create a new snapshot 868 snap, err := n.state.Snapshot() 869 if err != nil { 870 return nil, err 871 } 872 873 ns := &nomadSnapshot{ 874 snap: snap, 875 timetable: n.timetable, 876 } 877 return ns, nil 878 } 879 880 func (n *nomadFSM) Restore(old io.ReadCloser) error { 881 defer old.Close() 882 883 // Create a new state store 884 config := &state.StateStoreConfig{ 885 LogOutput: n.config.LogOutput, 886 Region: n.config.Region, 887 } 888 newState, err := state.NewStateStore(config) 889 if err != nil { 890 return err 891 } 892 893 // Start the state restore 894 restore, err := newState.Restore() 895 if err != nil { 896 return err 897 } 898 defer restore.Abort() 899 900 // Create a decoder 901 dec := codec.NewDecoder(old, structs.MsgpackHandle) 902 903 // Read in the header 904 var header snapshotHeader 905 if err := dec.Decode(&header); err != nil { 906 return err 907 } 908 909 // Populate the new state 910 msgType := make([]byte, 1) 911 for { 912 // Read the message type 913 _, err := old.Read(msgType) 914 if err == io.EOF { 915 break 916 } else if err != nil { 917 return err 918 } 919 920 // Decode 921 snapType := SnapshotType(msgType[0]) 922 switch snapType { 923 case TimeTableSnapshot: 924 if err := n.timetable.Deserialize(dec); err != nil { 925 return fmt.Errorf("time table deserialize failed: %v", err) 926 } 927 928 case NodeSnapshot: 929 node := new(structs.Node) 930 if err := dec.Decode(node); err != nil { 931 return err 932 } 933 if err := restore.NodeRestore(node); err != nil { 934 return err 935 } 936 937 case JobSnapshot: 938 job := new(structs.Job) 939 if err := dec.Decode(job); err != nil { 940 return err 941 } 942 943 /* Handle upgrade paths: 944 * - Empty maps and slices should be treated as nil to avoid 945 * un-intended destructive updates in scheduler since we use 946 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 947 * the incoming job. 948 * - Migrate from old style upgrade stanza that used only a stagger. 949 */ 950 job.Canonicalize() 951 952 if err := restore.JobRestore(job); err != nil { 953 return err 954 } 955 956 case EvalSnapshot: 957 eval := new(structs.Evaluation) 958 if err := dec.Decode(eval); err != nil { 959 return err 960 } 961 962 // COMPAT: Handle upgrade to v0.7.0 963 if eval.Namespace == "" { 964 eval.Namespace = structs.DefaultNamespace 965 } 966 967 if err := restore.EvalRestore(eval); err != nil { 968 return err 969 } 970 971 case AllocSnapshot: 972 alloc := new(structs.Allocation) 973 if err := dec.Decode(alloc); err != nil { 974 return err 975 } 976 977 // COMPAT: Handle upgrade to v0.7.0 978 if alloc.Namespace == "" { 979 alloc.Namespace = structs.DefaultNamespace 980 } 981 982 if err := restore.AllocRestore(alloc); err != nil { 983 return err 984 } 985 986 case IndexSnapshot: 987 idx := new(state.IndexEntry) 988 if err := dec.Decode(idx); err != nil { 989 return err 990 } 991 if err := restore.IndexRestore(idx); err != nil { 992 return err 993 } 994 995 case PeriodicLaunchSnapshot: 996 launch := new(structs.PeriodicLaunch) 997 if err := dec.Decode(launch); err != nil { 998 return err 999 } 1000 1001 // COMPAT: Handle upgrade to v0.7.0 1002 if launch.Namespace == "" { 1003 launch.Namespace = structs.DefaultNamespace 1004 } 1005 1006 if err := restore.PeriodicLaunchRestore(launch); err != nil { 1007 return err 1008 } 1009 1010 case JobSummarySnapshot: 1011 summary := new(structs.JobSummary) 1012 if err := dec.Decode(summary); err != nil { 1013 return err 1014 } 1015 1016 // COMPAT: Handle upgrade to v0.7.0 1017 if summary.Namespace == "" { 1018 summary.Namespace = structs.DefaultNamespace 1019 } 1020 1021 if err := restore.JobSummaryRestore(summary); err != nil { 1022 return err 1023 } 1024 1025 case VaultAccessorSnapshot: 1026 accessor := new(structs.VaultAccessor) 1027 if err := dec.Decode(accessor); err != nil { 1028 return err 1029 } 1030 if err := restore.VaultAccessorRestore(accessor); err != nil { 1031 return err 1032 } 1033 1034 case JobVersionSnapshot: 1035 version := new(structs.Job) 1036 if err := dec.Decode(version); err != nil { 1037 return err 1038 } 1039 1040 // COMPAT: Handle upgrade to v0.7.0 1041 if version.Namespace == "" { 1042 version.Namespace = structs.DefaultNamespace 1043 } 1044 1045 if err := restore.JobVersionRestore(version); err != nil { 1046 return err 1047 } 1048 1049 case DeploymentSnapshot: 1050 deployment := new(structs.Deployment) 1051 if err := dec.Decode(deployment); err != nil { 1052 return err 1053 } 1054 1055 // COMPAT: Handle upgrade to v0.7.0 1056 if deployment.Namespace == "" { 1057 deployment.Namespace = structs.DefaultNamespace 1058 } 1059 1060 if err := restore.DeploymentRestore(deployment); err != nil { 1061 return err 1062 } 1063 1064 case ACLPolicySnapshot: 1065 policy := new(structs.ACLPolicy) 1066 if err := dec.Decode(policy); err != nil { 1067 return err 1068 } 1069 if err := restore.ACLPolicyRestore(policy); err != nil { 1070 return err 1071 } 1072 1073 case ACLTokenSnapshot: 1074 token := new(structs.ACLToken) 1075 if err := dec.Decode(token); err != nil { 1076 return err 1077 } 1078 if err := restore.ACLTokenRestore(token); err != nil { 1079 return err 1080 } 1081 1082 default: 1083 // Check if this is an enterprise only object being restored 1084 restorer, ok := n.enterpriseRestorers[snapType] 1085 if !ok { 1086 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 1087 } 1088 1089 // Restore the enterprise only object 1090 if err := restorer(restore, dec); err != nil { 1091 return err 1092 } 1093 } 1094 } 1095 1096 restore.Commit() 1097 1098 // Create Job Summaries 1099 // COMPAT 0.4 -> 0.4.1 1100 // We can remove this in 0.5. This exists so that the server creates job 1101 // summaries if they were not present previously. When users upgrade to 0.5 1102 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 1103 // remove this block. 1104 index, err := newState.Index("job_summary") 1105 if err != nil { 1106 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 1107 } 1108 1109 // If the index is 0 that means there is no job summary in the snapshot so 1110 // we will have to create them 1111 if index == 0 { 1112 // query the latest index 1113 latestIndex, err := newState.LatestIndex() 1114 if err != nil { 1115 return fmt.Errorf("unable to query latest index: %v", index) 1116 } 1117 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 1118 return fmt.Errorf("error reconciling summaries: %v", err) 1119 } 1120 } 1121 1122 // External code might be calling State(), so we need to synchronize 1123 // here to make sure we swap in the new state store atomically. 1124 n.stateLock.Lock() 1125 stateOld := n.state 1126 n.state = newState 1127 n.stateLock.Unlock() 1128 1129 // Signal that the old state store has been abandoned. This is required 1130 // because we don't operate on it any more, we just throw it away, so 1131 // blocking queries won't see any changes and need to be woken up. 1132 stateOld.Abandon() 1133 1134 return nil 1135 } 1136 1137 // reconcileQueuedAllocations re-calculates the queued allocations for every job that we 1138 // created a Job Summary during the snap shot restore 1139 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 1140 // Get all the jobs 1141 ws := memdb.NewWatchSet() 1142 iter, err := n.state.Jobs(ws) 1143 if err != nil { 1144 return err 1145 } 1146 1147 snap, err := n.state.Snapshot() 1148 if err != nil { 1149 return fmt.Errorf("unable to create snapshot: %v", err) 1150 } 1151 1152 // Invoking the scheduler for every job so that we can populate the number 1153 // of queued allocations for every job 1154 for { 1155 rawJob := iter.Next() 1156 if rawJob == nil { 1157 break 1158 } 1159 job := rawJob.(*structs.Job) 1160 planner := &scheduler.Harness{ 1161 State: &snap.StateStore, 1162 } 1163 // Create an eval and mark it as requiring annotations and insert that as well 1164 eval := &structs.Evaluation{ 1165 ID: uuid.Generate(), 1166 Namespace: job.Namespace, 1167 Priority: job.Priority, 1168 Type: job.Type, 1169 TriggeredBy: structs.EvalTriggerJobRegister, 1170 JobID: job.ID, 1171 JobModifyIndex: job.JobModifyIndex + 1, 1172 Status: structs.EvalStatusPending, 1173 AnnotatePlan: true, 1174 } 1175 snap.UpsertEvals(100, []*structs.Evaluation{eval}) 1176 // Create the scheduler and run it 1177 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 1178 if err != nil { 1179 return err 1180 } 1181 1182 if err := sched.Process(eval); err != nil { 1183 return err 1184 } 1185 1186 // Get the job summary from the fsm state store 1187 originalSummary, err := n.state.JobSummaryByID(ws, job.Namespace, job.ID) 1188 if err != nil { 1189 return err 1190 } 1191 summary := originalSummary.Copy() 1192 1193 // Add the allocations scheduler has made to queued since these 1194 // allocations are never getting placed until the scheduler is invoked 1195 // with a real planner 1196 if l := len(planner.Plans); l != 1 { 1197 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 1198 } 1199 for _, allocations := range planner.Plans[0].NodeAllocation { 1200 for _, allocation := range allocations { 1201 tgSummary, ok := summary.Summary[allocation.TaskGroup] 1202 if !ok { 1203 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 1204 } 1205 tgSummary.Queued += 1 1206 summary.Summary[allocation.TaskGroup] = tgSummary 1207 } 1208 } 1209 1210 // Add the queued allocations attached to the evaluation to the queued 1211 // counter of the job summary 1212 if l := len(planner.Evals); l != 1 { 1213 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 1214 } 1215 for tg, queued := range planner.Evals[0].QueuedAllocations { 1216 tgSummary, ok := summary.Summary[tg] 1217 if !ok { 1218 return fmt.Errorf("task group %q not found while updating queued count", tg) 1219 } 1220 1221 // We add instead of setting here because we want to take into 1222 // consideration what the scheduler with a mock planner thinks it 1223 // placed. Those should be counted as queued as well 1224 tgSummary.Queued += queued 1225 summary.Summary[tg] = tgSummary 1226 } 1227 1228 if !reflect.DeepEqual(summary, originalSummary) { 1229 summary.ModifyIndex = index 1230 if err := n.state.UpsertJobSummary(index, summary); err != nil { 1231 return err 1232 } 1233 } 1234 } 1235 return nil 1236 } 1237 1238 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 1239 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 1240 // Register the nodes 1241 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 1242 1243 // Write the header 1244 header := snapshotHeader{} 1245 if err := encoder.Encode(&header); err != nil { 1246 sink.Cancel() 1247 return err 1248 } 1249 1250 // Write the time table 1251 sink.Write([]byte{byte(TimeTableSnapshot)}) 1252 if err := s.timetable.Serialize(encoder); err != nil { 1253 sink.Cancel() 1254 return err 1255 } 1256 1257 // Write all the data out 1258 if err := s.persistIndexes(sink, encoder); err != nil { 1259 sink.Cancel() 1260 return err 1261 } 1262 if err := s.persistNodes(sink, encoder); err != nil { 1263 sink.Cancel() 1264 return err 1265 } 1266 if err := s.persistJobs(sink, encoder); err != nil { 1267 sink.Cancel() 1268 return err 1269 } 1270 if err := s.persistEvals(sink, encoder); err != nil { 1271 sink.Cancel() 1272 return err 1273 } 1274 if err := s.persistAllocs(sink, encoder); err != nil { 1275 sink.Cancel() 1276 return err 1277 } 1278 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 1279 sink.Cancel() 1280 return err 1281 } 1282 if err := s.persistJobSummaries(sink, encoder); err != nil { 1283 sink.Cancel() 1284 return err 1285 } 1286 if err := s.persistVaultAccessors(sink, encoder); err != nil { 1287 sink.Cancel() 1288 return err 1289 } 1290 if err := s.persistJobVersions(sink, encoder); err != nil { 1291 sink.Cancel() 1292 return err 1293 } 1294 if err := s.persistDeployments(sink, encoder); err != nil { 1295 sink.Cancel() 1296 return err 1297 } 1298 if err := s.persistACLPolicies(sink, encoder); err != nil { 1299 sink.Cancel() 1300 return err 1301 } 1302 if err := s.persistACLTokens(sink, encoder); err != nil { 1303 sink.Cancel() 1304 return err 1305 } 1306 if err := s.persistEnterpriseTables(sink, encoder); err != nil { 1307 sink.Cancel() 1308 return err 1309 } 1310 return nil 1311 } 1312 1313 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 1314 encoder *codec.Encoder) error { 1315 // Get all the indexes 1316 iter, err := s.snap.Indexes() 1317 if err != nil { 1318 return err 1319 } 1320 1321 for { 1322 // Get the next item 1323 raw := iter.Next() 1324 if raw == nil { 1325 break 1326 } 1327 1328 // Prepare the request struct 1329 idx := raw.(*state.IndexEntry) 1330 1331 // Write out a node registration 1332 sink.Write([]byte{byte(IndexSnapshot)}) 1333 if err := encoder.Encode(idx); err != nil { 1334 return err 1335 } 1336 } 1337 return nil 1338 } 1339 1340 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 1341 encoder *codec.Encoder) error { 1342 // Get all the nodes 1343 ws := memdb.NewWatchSet() 1344 nodes, err := s.snap.Nodes(ws) 1345 if err != nil { 1346 return err 1347 } 1348 1349 for { 1350 // Get the next item 1351 raw := nodes.Next() 1352 if raw == nil { 1353 break 1354 } 1355 1356 // Prepare the request struct 1357 node := raw.(*structs.Node) 1358 1359 // Write out a node registration 1360 sink.Write([]byte{byte(NodeSnapshot)}) 1361 if err := encoder.Encode(node); err != nil { 1362 return err 1363 } 1364 } 1365 return nil 1366 } 1367 1368 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 1369 encoder *codec.Encoder) error { 1370 // Get all the jobs 1371 ws := memdb.NewWatchSet() 1372 jobs, err := s.snap.Jobs(ws) 1373 if err != nil { 1374 return err 1375 } 1376 1377 for { 1378 // Get the next item 1379 raw := jobs.Next() 1380 if raw == nil { 1381 break 1382 } 1383 1384 // Prepare the request struct 1385 job := raw.(*structs.Job) 1386 1387 // Write out a job registration 1388 sink.Write([]byte{byte(JobSnapshot)}) 1389 if err := encoder.Encode(job); err != nil { 1390 return err 1391 } 1392 } 1393 return nil 1394 } 1395 1396 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1397 encoder *codec.Encoder) error { 1398 // Get all the evaluations 1399 ws := memdb.NewWatchSet() 1400 evals, err := s.snap.Evals(ws) 1401 if err != nil { 1402 return err 1403 } 1404 1405 for { 1406 // Get the next item 1407 raw := evals.Next() 1408 if raw == nil { 1409 break 1410 } 1411 1412 // Prepare the request struct 1413 eval := raw.(*structs.Evaluation) 1414 1415 // Write out the evaluation 1416 sink.Write([]byte{byte(EvalSnapshot)}) 1417 if err := encoder.Encode(eval); err != nil { 1418 return err 1419 } 1420 } 1421 return nil 1422 } 1423 1424 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1425 encoder *codec.Encoder) error { 1426 // Get all the allocations 1427 ws := memdb.NewWatchSet() 1428 allocs, err := s.snap.Allocs(ws) 1429 if err != nil { 1430 return err 1431 } 1432 1433 for { 1434 // Get the next item 1435 raw := allocs.Next() 1436 if raw == nil { 1437 break 1438 } 1439 1440 // Prepare the request struct 1441 alloc := raw.(*structs.Allocation) 1442 1443 // Write out the evaluation 1444 sink.Write([]byte{byte(AllocSnapshot)}) 1445 if err := encoder.Encode(alloc); err != nil { 1446 return err 1447 } 1448 } 1449 return nil 1450 } 1451 1452 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1453 encoder *codec.Encoder) error { 1454 // Get all the jobs 1455 ws := memdb.NewWatchSet() 1456 launches, err := s.snap.PeriodicLaunches(ws) 1457 if err != nil { 1458 return err 1459 } 1460 1461 for { 1462 // Get the next item 1463 raw := launches.Next() 1464 if raw == nil { 1465 break 1466 } 1467 1468 // Prepare the request struct 1469 launch := raw.(*structs.PeriodicLaunch) 1470 1471 // Write out a job registration 1472 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1473 if err := encoder.Encode(launch); err != nil { 1474 return err 1475 } 1476 } 1477 return nil 1478 } 1479 1480 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1481 encoder *codec.Encoder) error { 1482 1483 ws := memdb.NewWatchSet() 1484 summaries, err := s.snap.JobSummaries(ws) 1485 if err != nil { 1486 return err 1487 } 1488 1489 for { 1490 raw := summaries.Next() 1491 if raw == nil { 1492 break 1493 } 1494 1495 jobSummary := raw.(*structs.JobSummary) 1496 1497 sink.Write([]byte{byte(JobSummarySnapshot)}) 1498 if err := encoder.Encode(jobSummary); err != nil { 1499 return err 1500 } 1501 } 1502 return nil 1503 } 1504 1505 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1506 encoder *codec.Encoder) error { 1507 1508 ws := memdb.NewWatchSet() 1509 accessors, err := s.snap.VaultAccessors(ws) 1510 if err != nil { 1511 return err 1512 } 1513 1514 for { 1515 raw := accessors.Next() 1516 if raw == nil { 1517 break 1518 } 1519 1520 accessor := raw.(*structs.VaultAccessor) 1521 1522 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1523 if err := encoder.Encode(accessor); err != nil { 1524 return err 1525 } 1526 } 1527 return nil 1528 } 1529 1530 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 1531 encoder *codec.Encoder) error { 1532 // Get all the jobs 1533 ws := memdb.NewWatchSet() 1534 versions, err := s.snap.JobVersions(ws) 1535 if err != nil { 1536 return err 1537 } 1538 1539 for { 1540 // Get the next item 1541 raw := versions.Next() 1542 if raw == nil { 1543 break 1544 } 1545 1546 // Prepare the request struct 1547 job := raw.(*structs.Job) 1548 1549 // Write out a job registration 1550 sink.Write([]byte{byte(JobVersionSnapshot)}) 1551 if err := encoder.Encode(job); err != nil { 1552 return err 1553 } 1554 } 1555 return nil 1556 } 1557 1558 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 1559 encoder *codec.Encoder) error { 1560 // Get all the jobs 1561 ws := memdb.NewWatchSet() 1562 deployments, err := s.snap.Deployments(ws) 1563 if err != nil { 1564 return err 1565 } 1566 1567 for { 1568 // Get the next item 1569 raw := deployments.Next() 1570 if raw == nil { 1571 break 1572 } 1573 1574 // Prepare the request struct 1575 deployment := raw.(*structs.Deployment) 1576 1577 // Write out a job registration 1578 sink.Write([]byte{byte(DeploymentSnapshot)}) 1579 if err := encoder.Encode(deployment); err != nil { 1580 return err 1581 } 1582 } 1583 return nil 1584 } 1585 1586 func (s *nomadSnapshot) persistACLPolicies(sink raft.SnapshotSink, 1587 encoder *codec.Encoder) error { 1588 // Get all the policies 1589 ws := memdb.NewWatchSet() 1590 policies, err := s.snap.ACLPolicies(ws) 1591 if err != nil { 1592 return err 1593 } 1594 1595 for { 1596 // Get the next item 1597 raw := policies.Next() 1598 if raw == nil { 1599 break 1600 } 1601 1602 // Prepare the request struct 1603 policy := raw.(*structs.ACLPolicy) 1604 1605 // Write out a policy registration 1606 sink.Write([]byte{byte(ACLPolicySnapshot)}) 1607 if err := encoder.Encode(policy); err != nil { 1608 return err 1609 } 1610 } 1611 return nil 1612 } 1613 1614 func (s *nomadSnapshot) persistACLTokens(sink raft.SnapshotSink, 1615 encoder *codec.Encoder) error { 1616 // Get all the policies 1617 ws := memdb.NewWatchSet() 1618 tokens, err := s.snap.ACLTokens(ws) 1619 if err != nil { 1620 return err 1621 } 1622 1623 for { 1624 // Get the next item 1625 raw := tokens.Next() 1626 if raw == nil { 1627 break 1628 } 1629 1630 // Prepare the request struct 1631 token := raw.(*structs.ACLToken) 1632 1633 // Write out a token registration 1634 sink.Write([]byte{byte(ACLTokenSnapshot)}) 1635 if err := encoder.Encode(token); err != nil { 1636 return err 1637 } 1638 } 1639 return nil 1640 } 1641 1642 // Release is a no-op, as we just need to GC the pointer 1643 // to the state store snapshot. There is nothing to explicitly 1644 // cleanup. 1645 func (s *nomadSnapshot) Release() {}