github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/state" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/nomad/scheduler" 17 "github.com/hashicorp/raft" 18 "github.com/ugorji/go/codec" 19 ) 20 21 const ( 22 // timeTableGranularity is the granularity of index to time tracking 23 timeTableGranularity = 5 * time.Minute 24 25 // timeTableLimit is the maximum limit of our tracking 26 timeTableLimit = 72 * time.Hour 27 ) 28 29 // SnapshotType is prefixed to a record in the FSM snapshot 30 // so that we can determine the type for restore 31 type SnapshotType byte 32 33 const ( 34 NodeSnapshot SnapshotType = iota 35 JobSnapshot 36 IndexSnapshot 37 EvalSnapshot 38 AllocSnapshot 39 TimeTableSnapshot 40 PeriodicLaunchSnapshot 41 JobSummarySnapshot 42 VaultAccessorSnapshot 43 JobVersionSnapshot 44 DeploymentSnapshot 45 ACLPolicySnapshot 46 ACLTokenSnapshot 47 ) 48 49 // LogApplier is the definition of a function that can apply a Raft log 50 type LogApplier func(buf []byte, index uint64) interface{} 51 52 // LogAppliers is a mapping of the Raft MessageType to the appropriate log 53 // applier 54 type LogAppliers map[structs.MessageType]LogApplier 55 56 // SnapshotRestorer is the definition of a function that can apply a Raft log 57 type SnapshotRestorer func(restore *state.StateRestore, dec *codec.Decoder) error 58 59 // SnapshotRestorers is a mapping of the SnapshotType to the appropriate 60 // snapshot restorer. 61 type SnapshotRestorers map[SnapshotType]SnapshotRestorer 62 63 // nomadFSM implements a finite state machine that is used 64 // along with Raft to provide strong consistency. We implement 65 // this outside the Server to avoid exposing this outside the package. 66 type nomadFSM struct { 67 evalBroker *EvalBroker 68 blockedEvals *BlockedEvals 69 periodicDispatcher *PeriodicDispatch 70 logger *log.Logger 71 state *state.StateStore 72 timetable *TimeTable 73 74 // config is the FSM config 75 config *FSMConfig 76 77 // enterpriseAppliers holds the set of enterprise only LogAppliers 78 enterpriseAppliers LogAppliers 79 80 // enterpriseRestorers holds the set of enterprise only snapshot restorers 81 enterpriseRestorers SnapshotRestorers 82 83 // stateLock is only used to protect outside callers to State() from 84 // racing with Restore(), which is called by Raft (it puts in a totally 85 // new state store). Everything internal here is synchronized by the 86 // Raft side, so doesn't need to lock this. 87 stateLock sync.RWMutex 88 } 89 90 // nomadSnapshot is used to provide a snapshot of the current 91 // state in a way that can be accessed concurrently with operations 92 // that may modify the live state. 93 type nomadSnapshot struct { 94 snap *state.StateSnapshot 95 timetable *TimeTable 96 } 97 98 // snapshotHeader is the first entry in our snapshot 99 type snapshotHeader struct { 100 } 101 102 // FSMConfig is used to configure the FSM 103 type FSMConfig struct { 104 // EvalBroker is the evaluation broker evaluations should be added to 105 EvalBroker *EvalBroker 106 107 // Periodic is the periodic job dispatcher that periodic jobs should be 108 // added/removed from 109 Periodic *PeriodicDispatch 110 111 // BlockedEvals is the blocked eval tracker that blocked evaulations should 112 // be added to. 113 Blocked *BlockedEvals 114 115 // LogOutput is the writer logs should be written to 116 LogOutput io.Writer 117 118 // Region is the region of the server embedding the FSM 119 Region string 120 } 121 122 // NewFSMPath is used to construct a new FSM with a blank state 123 func NewFSM(config *FSMConfig) (*nomadFSM, error) { 124 // Create a state store 125 sconfig := &state.StateStoreConfig{ 126 LogOutput: config.LogOutput, 127 Region: config.Region, 128 } 129 state, err := state.NewStateStore(sconfig) 130 if err != nil { 131 return nil, err 132 } 133 134 fsm := &nomadFSM{ 135 evalBroker: config.EvalBroker, 136 periodicDispatcher: config.Periodic, 137 blockedEvals: config.Blocked, 138 logger: log.New(config.LogOutput, "", log.LstdFlags), 139 config: config, 140 state: state, 141 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 142 enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8), 143 enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8), 144 } 145 146 // Register all the log applier functions 147 fsm.registerLogAppliers() 148 149 // Register all the snapshot restorer functions 150 fsm.registerSnapshotRestorers() 151 152 return fsm, nil 153 } 154 155 // Close is used to cleanup resources associated with the FSM 156 func (n *nomadFSM) Close() error { 157 return nil 158 } 159 160 // State is used to return a handle to the current state 161 func (n *nomadFSM) State() *state.StateStore { 162 n.stateLock.RLock() 163 defer n.stateLock.RUnlock() 164 return n.state 165 } 166 167 // TimeTable returns the time table of transactions 168 func (n *nomadFSM) TimeTable() *TimeTable { 169 return n.timetable 170 } 171 172 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 173 buf := log.Data 174 msgType := structs.MessageType(buf[0]) 175 176 // Witness this write 177 n.timetable.Witness(log.Index, time.Now().UTC()) 178 179 // Check if this message type should be ignored when unknown. This is 180 // used so that new commands can be added with developer control if older 181 // versions can safely ignore the command, or if they should crash. 182 ignoreUnknown := false 183 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 184 msgType &= ^structs.IgnoreUnknownTypeFlag 185 ignoreUnknown = true 186 } 187 188 switch msgType { 189 case structs.NodeRegisterRequestType: 190 return n.applyUpsertNode(buf[1:], log.Index) 191 case structs.NodeDeregisterRequestType: 192 return n.applyDeregisterNode(buf[1:], log.Index) 193 case structs.NodeUpdateStatusRequestType: 194 return n.applyStatusUpdate(buf[1:], log.Index) 195 case structs.NodeUpdateDrainRequestType: 196 return n.applyDrainUpdate(buf[1:], log.Index) 197 case structs.JobRegisterRequestType: 198 return n.applyUpsertJob(buf[1:], log.Index) 199 case structs.JobDeregisterRequestType: 200 return n.applyDeregisterJob(buf[1:], log.Index) 201 case structs.EvalUpdateRequestType: 202 return n.applyUpdateEval(buf[1:], log.Index) 203 case structs.EvalDeleteRequestType: 204 return n.applyDeleteEval(buf[1:], log.Index) 205 case structs.AllocUpdateRequestType: 206 return n.applyAllocUpdate(buf[1:], log.Index) 207 case structs.AllocClientUpdateRequestType: 208 return n.applyAllocClientUpdate(buf[1:], log.Index) 209 case structs.ReconcileJobSummariesRequestType: 210 return n.applyReconcileSummaries(buf[1:], log.Index) 211 case structs.VaultAccessorRegisterRequestType: 212 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 213 case structs.VaultAccessorDegisterRequestType: 214 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 215 case structs.ApplyPlanResultsRequestType: 216 return n.applyPlanResults(buf[1:], log.Index) 217 case structs.DeploymentStatusUpdateRequestType: 218 return n.applyDeploymentStatusUpdate(buf[1:], log.Index) 219 case structs.DeploymentPromoteRequestType: 220 return n.applyDeploymentPromotion(buf[1:], log.Index) 221 case structs.DeploymentAllocHealthRequestType: 222 return n.applyDeploymentAllocHealth(buf[1:], log.Index) 223 case structs.DeploymentDeleteRequestType: 224 return n.applyDeploymentDelete(buf[1:], log.Index) 225 case structs.JobStabilityRequestType: 226 return n.applyJobStability(buf[1:], log.Index) 227 case structs.ACLPolicyUpsertRequestType: 228 return n.applyACLPolicyUpsert(buf[1:], log.Index) 229 case structs.ACLPolicyDeleteRequestType: 230 return n.applyACLPolicyDelete(buf[1:], log.Index) 231 case structs.ACLTokenUpsertRequestType: 232 return n.applyACLTokenUpsert(buf[1:], log.Index) 233 case structs.ACLTokenDeleteRequestType: 234 return n.applyACLTokenDelete(buf[1:], log.Index) 235 case structs.ACLTokenBootstrapRequestType: 236 return n.applyACLTokenBootstrap(buf[1:], log.Index) 237 } 238 239 // Check enterprise only message types. 240 if applier, ok := n.enterpriseAppliers[msgType]; ok { 241 return applier(buf[1:], log.Index) 242 } 243 244 // We didn't match anything, either panic or ignore 245 if ignoreUnknown { 246 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 247 return nil 248 } 249 250 panic(fmt.Errorf("failed to apply request: %#v", buf)) 251 } 252 253 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 254 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 255 var req structs.NodeRegisterRequest 256 if err := structs.Decode(buf, &req); err != nil { 257 panic(fmt.Errorf("failed to decode request: %v", err)) 258 } 259 260 if err := n.state.UpsertNode(index, req.Node); err != nil { 261 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 262 return err 263 } 264 265 // Unblock evals for the nodes computed node class if it is in a ready 266 // state. 267 if req.Node.Status == structs.NodeStatusReady { 268 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 269 } 270 271 return nil 272 } 273 274 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 275 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 276 var req structs.NodeDeregisterRequest 277 if err := structs.Decode(buf, &req); err != nil { 278 panic(fmt.Errorf("failed to decode request: %v", err)) 279 } 280 281 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 282 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 283 return err 284 } 285 return nil 286 } 287 288 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 289 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 290 var req structs.NodeUpdateStatusRequest 291 if err := structs.Decode(buf, &req); err != nil { 292 panic(fmt.Errorf("failed to decode request: %v", err)) 293 } 294 295 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 296 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 297 return err 298 } 299 300 // Unblock evals for the nodes computed node class if it is in a ready 301 // state. 302 if req.Status == structs.NodeStatusReady { 303 ws := memdb.NewWatchSet() 304 node, err := n.state.NodeByID(ws, req.NodeID) 305 if err != nil { 306 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 307 return err 308 309 } 310 n.blockedEvals.Unblock(node.ComputedClass, index) 311 } 312 313 return nil 314 } 315 316 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 317 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 318 var req structs.NodeUpdateDrainRequest 319 if err := structs.Decode(buf, &req); err != nil { 320 panic(fmt.Errorf("failed to decode request: %v", err)) 321 } 322 323 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 324 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 325 return err 326 } 327 return nil 328 } 329 330 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 331 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 332 var req structs.JobRegisterRequest 333 if err := structs.Decode(buf, &req); err != nil { 334 panic(fmt.Errorf("failed to decode request: %v", err)) 335 } 336 337 /* Handle upgrade paths: 338 * - Empty maps and slices should be treated as nil to avoid 339 * un-intended destructive updates in scheduler since we use 340 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 341 * the incoming job. 342 * - Migrate from old style upgrade stanza that used only a stagger. 343 */ 344 req.Job.Canonicalize() 345 346 if err := n.state.UpsertJob(index, req.Job); err != nil { 347 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 348 return err 349 } 350 351 // We always add the job to the periodic dispatcher because there is the 352 // possibility that the periodic spec was removed and then we should stop 353 // tracking it. 354 if err := n.periodicDispatcher.Add(req.Job); err != nil { 355 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 356 return err 357 } 358 359 // Create a watch set 360 ws := memdb.NewWatchSet() 361 362 // If it is an active periodic job, record the time it was inserted. This is 363 // necessary for recovering during leader election. It is possible that from 364 // the time it is added to when it was suppose to launch, leader election 365 // occurs and the job was not launched. In this case, we use the insertion 366 // time to determine if a launch was missed. 367 if req.Job.IsPeriodicActive() { 368 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Namespace, req.Job.ID) 369 if err != nil { 370 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 371 return err 372 } 373 374 // Record the insertion time as a launch. We overload the launch table 375 // such that the first entry is the insertion time. 376 if prevLaunch == nil { 377 launch := &structs.PeriodicLaunch{ 378 ID: req.Job.ID, 379 Namespace: req.Namespace, 380 Launch: time.Now(), 381 } 382 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 383 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 384 return err 385 } 386 } 387 } 388 389 // Check if the parent job is periodic and mark the launch time. 390 parentID := req.Job.ParentID 391 if parentID != "" { 392 parent, err := n.state.JobByID(ws, req.Namespace, parentID) 393 if err != nil { 394 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 395 return err 396 } else if parent == nil { 397 // The parent has been deregistered. 398 return nil 399 } 400 401 if parent.IsPeriodic() && !parent.IsParameterized() { 402 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 403 if err != nil { 404 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 405 return err 406 } 407 408 launch := &structs.PeriodicLaunch{ 409 ID: parentID, 410 Namespace: req.Namespace, 411 Launch: t, 412 } 413 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 414 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 415 return err 416 } 417 } 418 } 419 420 return nil 421 } 422 423 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 424 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 425 var req structs.JobDeregisterRequest 426 if err := structs.Decode(buf, &req); err != nil { 427 panic(fmt.Errorf("failed to decode request: %v", err)) 428 } 429 430 // If it is periodic remove it from the dispatcher 431 if err := n.periodicDispatcher.Remove(req.Namespace, req.JobID); err != nil { 432 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 433 return err 434 } 435 436 if req.Purge { 437 if err := n.state.DeleteJob(index, req.Namespace, req.JobID); err != nil { 438 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 439 return err 440 } 441 442 // We always delete from the periodic launch table because it is possible that 443 // the job was updated to be non-perioidic, thus checking if it is periodic 444 // doesn't ensure we clean it up properly. 445 n.state.DeletePeriodicLaunch(index, req.Namespace, req.JobID) 446 } else { 447 // Get the current job and mark it as stopped and re-insert it. 448 ws := memdb.NewWatchSet() 449 current, err := n.state.JobByID(ws, req.Namespace, req.JobID) 450 if err != nil { 451 n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err) 452 return err 453 } 454 455 if current == nil { 456 return fmt.Errorf("job %q in namespace %q doesn't exist to be deregistered", req.JobID, req.Namespace) 457 } 458 459 stopped := current.Copy() 460 stopped.Stop = true 461 462 if err := n.state.UpsertJob(index, stopped); err != nil { 463 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 464 return err 465 } 466 } 467 468 return nil 469 } 470 471 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 472 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 473 var req structs.EvalUpdateRequest 474 if err := structs.Decode(buf, &req); err != nil { 475 panic(fmt.Errorf("failed to decode request: %v", err)) 476 } 477 478 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 479 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 480 return err 481 } 482 483 for _, eval := range req.Evals { 484 if eval.ShouldEnqueue() { 485 n.evalBroker.Enqueue(eval) 486 } else if eval.ShouldBlock() { 487 n.blockedEvals.Block(eval) 488 } else if eval.Status == structs.EvalStatusComplete && 489 len(eval.FailedTGAllocs) == 0 { 490 // If we have a successful evaluation for a node, untrack any 491 // blocked evaluation 492 n.blockedEvals.Untrack(eval.JobID) 493 } 494 } 495 return nil 496 } 497 498 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 499 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 500 var req structs.EvalDeleteRequest 501 if err := structs.Decode(buf, &req); err != nil { 502 panic(fmt.Errorf("failed to decode request: %v", err)) 503 } 504 505 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 506 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 507 return err 508 } 509 return nil 510 } 511 512 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 513 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 514 var req structs.AllocUpdateRequest 515 if err := structs.Decode(buf, &req); err != nil { 516 panic(fmt.Errorf("failed to decode request: %v", err)) 517 } 518 519 // Attach the job to all the allocations. It is pulled out in the 520 // payload to avoid the redundancy of encoding, but should be denormalized 521 // prior to being inserted into MemDB. 522 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 523 524 // Calculate the total resources of allocations. It is pulled out in the 525 // payload to avoid encoding something that can be computed, but should be 526 // denormalized prior to being inserted into MemDB. 527 for _, alloc := range req.Alloc { 528 if alloc.Resources != nil { 529 // COMPAT 0.4.1 -> 0.5 530 // Set the shared resources for allocations which don't have them 531 if alloc.SharedResources == nil { 532 alloc.SharedResources = &structs.Resources{ 533 DiskMB: alloc.Resources.DiskMB, 534 } 535 } 536 537 continue 538 } 539 540 alloc.Resources = new(structs.Resources) 541 for _, task := range alloc.TaskResources { 542 alloc.Resources.Add(task) 543 } 544 545 // Add the shared resources 546 alloc.Resources.Add(alloc.SharedResources) 547 } 548 549 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 550 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 551 return err 552 } 553 return nil 554 } 555 556 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 557 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 558 var req structs.AllocUpdateRequest 559 if err := structs.Decode(buf, &req); err != nil { 560 panic(fmt.Errorf("failed to decode request: %v", err)) 561 } 562 if len(req.Alloc) == 0 { 563 return nil 564 } 565 566 // Create a watch set 567 ws := memdb.NewWatchSet() 568 569 // Updating the allocs with the job id and task group name 570 for _, alloc := range req.Alloc { 571 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 572 alloc.JobID = existing.JobID 573 alloc.TaskGroup = existing.TaskGroup 574 } 575 } 576 577 // Update all the client allocations 578 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 579 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 580 return err 581 } 582 583 // Unblock evals for the nodes computed node class if the client has 584 // finished running an allocation. 585 for _, alloc := range req.Alloc { 586 if alloc.ClientStatus == structs.AllocClientStatusComplete || 587 alloc.ClientStatus == structs.AllocClientStatusFailed { 588 nodeID := alloc.NodeID 589 node, err := n.state.NodeByID(ws, nodeID) 590 if err != nil || node == nil { 591 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 592 return err 593 594 } 595 596 // Unblock any associated quota 597 quota, err := n.allocQuota(alloc.ID) 598 if err != nil { 599 n.logger.Printf("[ERR] nomad.fsm: looking up quota associated with alloc %q failed: %v", alloc.ID, err) 600 return err 601 } 602 603 n.blockedEvals.UnblockClassAndQuota(node.ComputedClass, quota, index) 604 } 605 } 606 607 return nil 608 } 609 610 // applyReconcileSummaries reconciles summaries for all the jobs 611 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 612 if err := n.state.ReconcileJobSummaries(index); err != nil { 613 return err 614 } 615 return n.reconcileQueuedAllocations(index) 616 } 617 618 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 619 // and task 620 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 621 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 622 var req structs.VaultAccessorsRequest 623 if err := structs.Decode(buf, &req); err != nil { 624 panic(fmt.Errorf("failed to decode request: %v", err)) 625 } 626 627 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 628 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 629 return err 630 } 631 632 return nil 633 } 634 635 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 636 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 637 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 638 var req structs.VaultAccessorsRequest 639 if err := structs.Decode(buf, &req); err != nil { 640 panic(fmt.Errorf("failed to decode request: %v", err)) 641 } 642 643 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 644 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 645 return err 646 } 647 648 return nil 649 } 650 651 // applyPlanApply applies the results of a plan application 652 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 653 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 654 var req structs.ApplyPlanResultsRequest 655 if err := structs.Decode(buf, &req); err != nil { 656 panic(fmt.Errorf("failed to decode request: %v", err)) 657 } 658 659 if err := n.state.UpsertPlanResults(index, &req); err != nil { 660 n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err) 661 return err 662 } 663 664 return nil 665 } 666 667 // applyDeploymentStatusUpdate is used to update the status of an existing 668 // deployment 669 func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} { 670 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now()) 671 var req structs.DeploymentStatusUpdateRequest 672 if err := structs.Decode(buf, &req); err != nil { 673 panic(fmt.Errorf("failed to decode request: %v", err)) 674 } 675 676 if err := n.state.UpdateDeploymentStatus(index, &req); err != nil { 677 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentStatusUpdate failed: %v", err) 678 return err 679 } 680 681 if req.Eval != nil && req.Eval.ShouldEnqueue() { 682 n.evalBroker.Enqueue(req.Eval) 683 } 684 685 return nil 686 } 687 688 // applyDeploymentPromotion is used to promote canaries in a deployment 689 func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} { 690 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now()) 691 var req structs.ApplyDeploymentPromoteRequest 692 if err := structs.Decode(buf, &req); err != nil { 693 panic(fmt.Errorf("failed to decode request: %v", err)) 694 } 695 696 if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil { 697 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentPromotion failed: %v", err) 698 return err 699 } 700 701 if req.Eval != nil && req.Eval.ShouldEnqueue() { 702 n.evalBroker.Enqueue(req.Eval) 703 } 704 705 return nil 706 } 707 708 // applyDeploymentAllocHealth is used to set the health of allocations as part 709 // of a deployment 710 func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} { 711 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now()) 712 var req structs.ApplyDeploymentAllocHealthRequest 713 if err := structs.Decode(buf, &req); err != nil { 714 panic(fmt.Errorf("failed to decode request: %v", err)) 715 } 716 717 if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil { 718 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentAllocHealth failed: %v", err) 719 return err 720 } 721 722 if req.Eval != nil && req.Eval.ShouldEnqueue() { 723 n.evalBroker.Enqueue(req.Eval) 724 } 725 726 return nil 727 } 728 729 // applyDeploymentDelete is used to delete a set of deployments 730 func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} { 731 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now()) 732 var req structs.DeploymentDeleteRequest 733 if err := structs.Decode(buf, &req); err != nil { 734 panic(fmt.Errorf("failed to decode request: %v", err)) 735 } 736 737 if err := n.state.DeleteDeployment(index, req.Deployments); err != nil { 738 n.logger.Printf("[ERR] nomad.fsm: DeleteDeployment failed: %v", err) 739 return err 740 } 741 742 return nil 743 } 744 745 // applyJobStability is used to set the stability of a job 746 func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} { 747 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now()) 748 var req structs.JobStabilityRequest 749 if err := structs.Decode(buf, &req); err != nil { 750 panic(fmt.Errorf("failed to decode request: %v", err)) 751 } 752 753 if err := n.state.UpdateJobStability(index, req.Namespace, req.JobID, req.JobVersion, req.Stable); err != nil { 754 n.logger.Printf("[ERR] nomad.fsm: UpdateJobStability failed: %v", err) 755 return err 756 } 757 758 return nil 759 } 760 761 // applyACLPolicyUpsert is used to upsert a set of policies 762 func (n *nomadFSM) applyACLPolicyUpsert(buf []byte, index uint64) interface{} { 763 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_upsert"}, time.Now()) 764 var req structs.ACLPolicyUpsertRequest 765 if err := structs.Decode(buf, &req); err != nil { 766 panic(fmt.Errorf("failed to decode request: %v", err)) 767 } 768 769 if err := n.state.UpsertACLPolicies(index, req.Policies); err != nil { 770 n.logger.Printf("[ERR] nomad.fsm: UpsertACLPolicies failed: %v", err) 771 return err 772 } 773 return nil 774 } 775 776 // applyACLPolicyDelete is used to delete a set of policies 777 func (n *nomadFSM) applyACLPolicyDelete(buf []byte, index uint64) interface{} { 778 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_delete"}, time.Now()) 779 var req structs.ACLPolicyDeleteRequest 780 if err := structs.Decode(buf, &req); err != nil { 781 panic(fmt.Errorf("failed to decode request: %v", err)) 782 } 783 784 if err := n.state.DeleteACLPolicies(index, req.Names); err != nil { 785 n.logger.Printf("[ERR] nomad.fsm: DeleteACLPolicies failed: %v", err) 786 return err 787 } 788 return nil 789 } 790 791 // applyACLTokenUpsert is used to upsert a set of policies 792 func (n *nomadFSM) applyACLTokenUpsert(buf []byte, index uint64) interface{} { 793 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_upsert"}, time.Now()) 794 var req structs.ACLTokenUpsertRequest 795 if err := structs.Decode(buf, &req); err != nil { 796 panic(fmt.Errorf("failed to decode request: %v", err)) 797 } 798 799 if err := n.state.UpsertACLTokens(index, req.Tokens); err != nil { 800 n.logger.Printf("[ERR] nomad.fsm: UpsertACLTokens failed: %v", err) 801 return err 802 } 803 return nil 804 } 805 806 // applyACLTokenDelete is used to delete a set of policies 807 func (n *nomadFSM) applyACLTokenDelete(buf []byte, index uint64) interface{} { 808 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_delete"}, time.Now()) 809 var req structs.ACLTokenDeleteRequest 810 if err := structs.Decode(buf, &req); err != nil { 811 panic(fmt.Errorf("failed to decode request: %v", err)) 812 } 813 814 if err := n.state.DeleteACLTokens(index, req.AccessorIDs); err != nil { 815 n.logger.Printf("[ERR] nomad.fsm: DeleteACLTokens failed: %v", err) 816 return err 817 } 818 return nil 819 } 820 821 // applyACLTokenBootstrap is used to bootstrap an ACL token 822 func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{} { 823 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_bootstrap"}, time.Now()) 824 var req structs.ACLTokenBootstrapRequest 825 if err := structs.Decode(buf, &req); err != nil { 826 panic(fmt.Errorf("failed to decode request: %v", err)) 827 } 828 829 if err := n.state.BootstrapACLTokens(index, req.ResetIndex, req.Token); err != nil { 830 n.logger.Printf("[ERR] nomad.fsm: BootstrapACLToken failed: %v", err) 831 return err 832 } 833 return nil 834 } 835 836 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 837 // Create a new snapshot 838 snap, err := n.state.Snapshot() 839 if err != nil { 840 return nil, err 841 } 842 843 ns := &nomadSnapshot{ 844 snap: snap, 845 timetable: n.timetable, 846 } 847 return ns, nil 848 } 849 850 func (n *nomadFSM) Restore(old io.ReadCloser) error { 851 defer old.Close() 852 853 // Create a new state store 854 config := &state.StateStoreConfig{ 855 LogOutput: n.config.LogOutput, 856 Region: n.config.Region, 857 } 858 newState, err := state.NewStateStore(config) 859 if err != nil { 860 return err 861 } 862 863 // Start the state restore 864 restore, err := newState.Restore() 865 if err != nil { 866 return err 867 } 868 defer restore.Abort() 869 870 // Create a decoder 871 dec := codec.NewDecoder(old, structs.MsgpackHandle) 872 873 // Read in the header 874 var header snapshotHeader 875 if err := dec.Decode(&header); err != nil { 876 return err 877 } 878 879 // Populate the new state 880 msgType := make([]byte, 1) 881 for { 882 // Read the message type 883 _, err := old.Read(msgType) 884 if err == io.EOF { 885 break 886 } else if err != nil { 887 return err 888 } 889 890 // Decode 891 snapType := SnapshotType(msgType[0]) 892 switch snapType { 893 case TimeTableSnapshot: 894 if err := n.timetable.Deserialize(dec); err != nil { 895 return fmt.Errorf("time table deserialize failed: %v", err) 896 } 897 898 case NodeSnapshot: 899 node := new(structs.Node) 900 if err := dec.Decode(node); err != nil { 901 return err 902 } 903 if err := restore.NodeRestore(node); err != nil { 904 return err 905 } 906 907 case JobSnapshot: 908 job := new(structs.Job) 909 if err := dec.Decode(job); err != nil { 910 return err 911 } 912 913 /* Handle upgrade paths: 914 * - Empty maps and slices should be treated as nil to avoid 915 * un-intended destructive updates in scheduler since we use 916 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 917 * the incoming job. 918 * - Migrate from old style upgrade stanza that used only a stagger. 919 */ 920 job.Canonicalize() 921 922 if err := restore.JobRestore(job); err != nil { 923 return err 924 } 925 926 case EvalSnapshot: 927 eval := new(structs.Evaluation) 928 if err := dec.Decode(eval); err != nil { 929 return err 930 } 931 932 // COMPAT: Handle upgrade to v0.7.0 933 if eval.Namespace == "" { 934 eval.Namespace = structs.DefaultNamespace 935 } 936 937 if err := restore.EvalRestore(eval); err != nil { 938 return err 939 } 940 941 case AllocSnapshot: 942 alloc := new(structs.Allocation) 943 if err := dec.Decode(alloc); err != nil { 944 return err 945 } 946 947 // COMPAT: Handle upgrade to v0.7.0 948 if alloc.Namespace == "" { 949 alloc.Namespace = structs.DefaultNamespace 950 } 951 952 if err := restore.AllocRestore(alloc); err != nil { 953 return err 954 } 955 956 case IndexSnapshot: 957 idx := new(state.IndexEntry) 958 if err := dec.Decode(idx); err != nil { 959 return err 960 } 961 if err := restore.IndexRestore(idx); err != nil { 962 return err 963 } 964 965 case PeriodicLaunchSnapshot: 966 launch := new(structs.PeriodicLaunch) 967 if err := dec.Decode(launch); err != nil { 968 return err 969 } 970 971 // COMPAT: Handle upgrade to v0.7.0 972 if launch.Namespace == "" { 973 launch.Namespace = structs.DefaultNamespace 974 } 975 976 if err := restore.PeriodicLaunchRestore(launch); err != nil { 977 return err 978 } 979 980 case JobSummarySnapshot: 981 summary := new(structs.JobSummary) 982 if err := dec.Decode(summary); err != nil { 983 return err 984 } 985 986 // COMPAT: Handle upgrade to v0.7.0 987 if summary.Namespace == "" { 988 summary.Namespace = structs.DefaultNamespace 989 } 990 991 if err := restore.JobSummaryRestore(summary); err != nil { 992 return err 993 } 994 995 case VaultAccessorSnapshot: 996 accessor := new(structs.VaultAccessor) 997 if err := dec.Decode(accessor); err != nil { 998 return err 999 } 1000 if err := restore.VaultAccessorRestore(accessor); err != nil { 1001 return err 1002 } 1003 1004 case JobVersionSnapshot: 1005 version := new(structs.Job) 1006 if err := dec.Decode(version); err != nil { 1007 return err 1008 } 1009 1010 // COMPAT: Handle upgrade to v0.7.0 1011 if version.Namespace == "" { 1012 version.Namespace = structs.DefaultNamespace 1013 } 1014 1015 if err := restore.JobVersionRestore(version); err != nil { 1016 return err 1017 } 1018 1019 case DeploymentSnapshot: 1020 deployment := new(structs.Deployment) 1021 if err := dec.Decode(deployment); err != nil { 1022 return err 1023 } 1024 1025 // COMPAT: Handle upgrade to v0.7.0 1026 if deployment.Namespace == "" { 1027 deployment.Namespace = structs.DefaultNamespace 1028 } 1029 1030 if err := restore.DeploymentRestore(deployment); err != nil { 1031 return err 1032 } 1033 1034 case ACLPolicySnapshot: 1035 policy := new(structs.ACLPolicy) 1036 if err := dec.Decode(policy); err != nil { 1037 return err 1038 } 1039 if err := restore.ACLPolicyRestore(policy); err != nil { 1040 return err 1041 } 1042 1043 case ACLTokenSnapshot: 1044 token := new(structs.ACLToken) 1045 if err := dec.Decode(token); err != nil { 1046 return err 1047 } 1048 if err := restore.ACLTokenRestore(token); err != nil { 1049 return err 1050 } 1051 1052 default: 1053 // Check if this is an enterprise only object being restored 1054 restorer, ok := n.enterpriseRestorers[snapType] 1055 if !ok { 1056 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 1057 } 1058 1059 // Restore the enterprise only object 1060 if err := restorer(restore, dec); err != nil { 1061 return err 1062 } 1063 } 1064 } 1065 1066 restore.Commit() 1067 1068 // Create Job Summaries 1069 // COMPAT 0.4 -> 0.4.1 1070 // We can remove this in 0.5. This exists so that the server creates job 1071 // summaries if they were not present previously. When users upgrade to 0.5 1072 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 1073 // remove this block. 1074 index, err := newState.Index("job_summary") 1075 if err != nil { 1076 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 1077 } 1078 1079 // If the index is 0 that means there is no job summary in the snapshot so 1080 // we will have to create them 1081 if index == 0 { 1082 // query the latest index 1083 latestIndex, err := newState.LatestIndex() 1084 if err != nil { 1085 return fmt.Errorf("unable to query latest index: %v", index) 1086 } 1087 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 1088 return fmt.Errorf("error reconciling summaries: %v", err) 1089 } 1090 } 1091 1092 // External code might be calling State(), so we need to synchronize 1093 // here to make sure we swap in the new state store atomically. 1094 n.stateLock.Lock() 1095 stateOld := n.state 1096 n.state = newState 1097 n.stateLock.Unlock() 1098 1099 // Signal that the old state store has been abandoned. This is required 1100 // because we don't operate on it any more, we just throw it away, so 1101 // blocking queries won't see any changes and need to be woken up. 1102 stateOld.Abandon() 1103 1104 return nil 1105 } 1106 1107 // reconcileQueuedAllocations re-calculates the queued allocations for every job that we 1108 // created a Job Summary during the snap shot restore 1109 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 1110 // Get all the jobs 1111 ws := memdb.NewWatchSet() 1112 iter, err := n.state.Jobs(ws) 1113 if err != nil { 1114 return err 1115 } 1116 1117 snap, err := n.state.Snapshot() 1118 if err != nil { 1119 return fmt.Errorf("unable to create snapshot: %v", err) 1120 } 1121 1122 // Invoking the scheduler for every job so that we can populate the number 1123 // of queued allocations for every job 1124 for { 1125 rawJob := iter.Next() 1126 if rawJob == nil { 1127 break 1128 } 1129 job := rawJob.(*structs.Job) 1130 planner := &scheduler.Harness{ 1131 State: &snap.StateStore, 1132 } 1133 // Create an eval and mark it as requiring annotations and insert that as well 1134 eval := &structs.Evaluation{ 1135 ID: uuid.Generate(), 1136 Namespace: job.Namespace, 1137 Priority: job.Priority, 1138 Type: job.Type, 1139 TriggeredBy: structs.EvalTriggerJobRegister, 1140 JobID: job.ID, 1141 JobModifyIndex: job.JobModifyIndex + 1, 1142 Status: structs.EvalStatusPending, 1143 AnnotatePlan: true, 1144 } 1145 snap.UpsertEvals(100, []*structs.Evaluation{eval}) 1146 // Create the scheduler and run it 1147 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 1148 if err != nil { 1149 return err 1150 } 1151 1152 if err := sched.Process(eval); err != nil { 1153 return err 1154 } 1155 1156 // Get the job summary from the fsm state store 1157 originalSummary, err := n.state.JobSummaryByID(ws, job.Namespace, job.ID) 1158 if err != nil { 1159 return err 1160 } 1161 summary := originalSummary.Copy() 1162 1163 // Add the allocations scheduler has made to queued since these 1164 // allocations are never getting placed until the scheduler is invoked 1165 // with a real planner 1166 if l := len(planner.Plans); l != 1 { 1167 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 1168 } 1169 for _, allocations := range planner.Plans[0].NodeAllocation { 1170 for _, allocation := range allocations { 1171 tgSummary, ok := summary.Summary[allocation.TaskGroup] 1172 if !ok { 1173 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 1174 } 1175 tgSummary.Queued += 1 1176 summary.Summary[allocation.TaskGroup] = tgSummary 1177 } 1178 } 1179 1180 // Add the queued allocations attached to the evaluation to the queued 1181 // counter of the job summary 1182 if l := len(planner.Evals); l != 1 { 1183 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 1184 } 1185 for tg, queued := range planner.Evals[0].QueuedAllocations { 1186 tgSummary, ok := summary.Summary[tg] 1187 if !ok { 1188 return fmt.Errorf("task group %q not found while updating queued count", tg) 1189 } 1190 1191 // We add instead of setting here because we want to take into 1192 // consideration what the scheduler with a mock planner thinks it 1193 // placed. Those should be counted as queued as well 1194 tgSummary.Queued += queued 1195 summary.Summary[tg] = tgSummary 1196 } 1197 1198 if !reflect.DeepEqual(summary, originalSummary) { 1199 summary.ModifyIndex = index 1200 if err := n.state.UpsertJobSummary(index, summary); err != nil { 1201 return err 1202 } 1203 } 1204 } 1205 return nil 1206 } 1207 1208 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 1209 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 1210 // Register the nodes 1211 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 1212 1213 // Write the header 1214 header := snapshotHeader{} 1215 if err := encoder.Encode(&header); err != nil { 1216 sink.Cancel() 1217 return err 1218 } 1219 1220 // Write the time table 1221 sink.Write([]byte{byte(TimeTableSnapshot)}) 1222 if err := s.timetable.Serialize(encoder); err != nil { 1223 sink.Cancel() 1224 return err 1225 } 1226 1227 // Write all the data out 1228 if err := s.persistIndexes(sink, encoder); err != nil { 1229 sink.Cancel() 1230 return err 1231 } 1232 if err := s.persistNodes(sink, encoder); err != nil { 1233 sink.Cancel() 1234 return err 1235 } 1236 if err := s.persistJobs(sink, encoder); err != nil { 1237 sink.Cancel() 1238 return err 1239 } 1240 if err := s.persistEvals(sink, encoder); err != nil { 1241 sink.Cancel() 1242 return err 1243 } 1244 if err := s.persistAllocs(sink, encoder); err != nil { 1245 sink.Cancel() 1246 return err 1247 } 1248 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 1249 sink.Cancel() 1250 return err 1251 } 1252 if err := s.persistJobSummaries(sink, encoder); err != nil { 1253 sink.Cancel() 1254 return err 1255 } 1256 if err := s.persistVaultAccessors(sink, encoder); err != nil { 1257 sink.Cancel() 1258 return err 1259 } 1260 if err := s.persistJobVersions(sink, encoder); err != nil { 1261 sink.Cancel() 1262 return err 1263 } 1264 if err := s.persistDeployments(sink, encoder); err != nil { 1265 sink.Cancel() 1266 return err 1267 } 1268 if err := s.persistACLPolicies(sink, encoder); err != nil { 1269 sink.Cancel() 1270 return err 1271 } 1272 if err := s.persistACLTokens(sink, encoder); err != nil { 1273 sink.Cancel() 1274 return err 1275 } 1276 if err := s.persistEnterpriseTables(sink, encoder); err != nil { 1277 sink.Cancel() 1278 return err 1279 } 1280 return nil 1281 } 1282 1283 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 1284 encoder *codec.Encoder) error { 1285 // Get all the indexes 1286 iter, err := s.snap.Indexes() 1287 if err != nil { 1288 return err 1289 } 1290 1291 for { 1292 // Get the next item 1293 raw := iter.Next() 1294 if raw == nil { 1295 break 1296 } 1297 1298 // Prepare the request struct 1299 idx := raw.(*state.IndexEntry) 1300 1301 // Write out a node registration 1302 sink.Write([]byte{byte(IndexSnapshot)}) 1303 if err := encoder.Encode(idx); err != nil { 1304 return err 1305 } 1306 } 1307 return nil 1308 } 1309 1310 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 1311 encoder *codec.Encoder) error { 1312 // Get all the nodes 1313 ws := memdb.NewWatchSet() 1314 nodes, err := s.snap.Nodes(ws) 1315 if err != nil { 1316 return err 1317 } 1318 1319 for { 1320 // Get the next item 1321 raw := nodes.Next() 1322 if raw == nil { 1323 break 1324 } 1325 1326 // Prepare the request struct 1327 node := raw.(*structs.Node) 1328 1329 // Write out a node registration 1330 sink.Write([]byte{byte(NodeSnapshot)}) 1331 if err := encoder.Encode(node); err != nil { 1332 return err 1333 } 1334 } 1335 return nil 1336 } 1337 1338 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 1339 encoder *codec.Encoder) error { 1340 // Get all the jobs 1341 ws := memdb.NewWatchSet() 1342 jobs, err := s.snap.Jobs(ws) 1343 if err != nil { 1344 return err 1345 } 1346 1347 for { 1348 // Get the next item 1349 raw := jobs.Next() 1350 if raw == nil { 1351 break 1352 } 1353 1354 // Prepare the request struct 1355 job := raw.(*structs.Job) 1356 1357 // Write out a job registration 1358 sink.Write([]byte{byte(JobSnapshot)}) 1359 if err := encoder.Encode(job); err != nil { 1360 return err 1361 } 1362 } 1363 return nil 1364 } 1365 1366 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1367 encoder *codec.Encoder) error { 1368 // Get all the evaluations 1369 ws := memdb.NewWatchSet() 1370 evals, err := s.snap.Evals(ws) 1371 if err != nil { 1372 return err 1373 } 1374 1375 for { 1376 // Get the next item 1377 raw := evals.Next() 1378 if raw == nil { 1379 break 1380 } 1381 1382 // Prepare the request struct 1383 eval := raw.(*structs.Evaluation) 1384 1385 // Write out the evaluation 1386 sink.Write([]byte{byte(EvalSnapshot)}) 1387 if err := encoder.Encode(eval); err != nil { 1388 return err 1389 } 1390 } 1391 return nil 1392 } 1393 1394 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1395 encoder *codec.Encoder) error { 1396 // Get all the allocations 1397 ws := memdb.NewWatchSet() 1398 allocs, err := s.snap.Allocs(ws) 1399 if err != nil { 1400 return err 1401 } 1402 1403 for { 1404 // Get the next item 1405 raw := allocs.Next() 1406 if raw == nil { 1407 break 1408 } 1409 1410 // Prepare the request struct 1411 alloc := raw.(*structs.Allocation) 1412 1413 // Write out the evaluation 1414 sink.Write([]byte{byte(AllocSnapshot)}) 1415 if err := encoder.Encode(alloc); err != nil { 1416 return err 1417 } 1418 } 1419 return nil 1420 } 1421 1422 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1423 encoder *codec.Encoder) error { 1424 // Get all the jobs 1425 ws := memdb.NewWatchSet() 1426 launches, err := s.snap.PeriodicLaunches(ws) 1427 if err != nil { 1428 return err 1429 } 1430 1431 for { 1432 // Get the next item 1433 raw := launches.Next() 1434 if raw == nil { 1435 break 1436 } 1437 1438 // Prepare the request struct 1439 launch := raw.(*structs.PeriodicLaunch) 1440 1441 // Write out a job registration 1442 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1443 if err := encoder.Encode(launch); err != nil { 1444 return err 1445 } 1446 } 1447 return nil 1448 } 1449 1450 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1451 encoder *codec.Encoder) error { 1452 1453 ws := memdb.NewWatchSet() 1454 summaries, err := s.snap.JobSummaries(ws) 1455 if err != nil { 1456 return err 1457 } 1458 1459 for { 1460 raw := summaries.Next() 1461 if raw == nil { 1462 break 1463 } 1464 1465 jobSummary := raw.(*structs.JobSummary) 1466 1467 sink.Write([]byte{byte(JobSummarySnapshot)}) 1468 if err := encoder.Encode(jobSummary); err != nil { 1469 return err 1470 } 1471 } 1472 return nil 1473 } 1474 1475 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1476 encoder *codec.Encoder) error { 1477 1478 ws := memdb.NewWatchSet() 1479 accessors, err := s.snap.VaultAccessors(ws) 1480 if err != nil { 1481 return err 1482 } 1483 1484 for { 1485 raw := accessors.Next() 1486 if raw == nil { 1487 break 1488 } 1489 1490 accessor := raw.(*structs.VaultAccessor) 1491 1492 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1493 if err := encoder.Encode(accessor); err != nil { 1494 return err 1495 } 1496 } 1497 return nil 1498 } 1499 1500 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 1501 encoder *codec.Encoder) error { 1502 // Get all the jobs 1503 ws := memdb.NewWatchSet() 1504 versions, err := s.snap.JobVersions(ws) 1505 if err != nil { 1506 return err 1507 } 1508 1509 for { 1510 // Get the next item 1511 raw := versions.Next() 1512 if raw == nil { 1513 break 1514 } 1515 1516 // Prepare the request struct 1517 job := raw.(*structs.Job) 1518 1519 // Write out a job registration 1520 sink.Write([]byte{byte(JobVersionSnapshot)}) 1521 if err := encoder.Encode(job); err != nil { 1522 return err 1523 } 1524 } 1525 return nil 1526 } 1527 1528 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 1529 encoder *codec.Encoder) error { 1530 // Get all the jobs 1531 ws := memdb.NewWatchSet() 1532 deployments, err := s.snap.Deployments(ws) 1533 if err != nil { 1534 return err 1535 } 1536 1537 for { 1538 // Get the next item 1539 raw := deployments.Next() 1540 if raw == nil { 1541 break 1542 } 1543 1544 // Prepare the request struct 1545 deployment := raw.(*structs.Deployment) 1546 1547 // Write out a job registration 1548 sink.Write([]byte{byte(DeploymentSnapshot)}) 1549 if err := encoder.Encode(deployment); err != nil { 1550 return err 1551 } 1552 } 1553 return nil 1554 } 1555 1556 func (s *nomadSnapshot) persistACLPolicies(sink raft.SnapshotSink, 1557 encoder *codec.Encoder) error { 1558 // Get all the policies 1559 ws := memdb.NewWatchSet() 1560 policies, err := s.snap.ACLPolicies(ws) 1561 if err != nil { 1562 return err 1563 } 1564 1565 for { 1566 // Get the next item 1567 raw := policies.Next() 1568 if raw == nil { 1569 break 1570 } 1571 1572 // Prepare the request struct 1573 policy := raw.(*structs.ACLPolicy) 1574 1575 // Write out a policy registration 1576 sink.Write([]byte{byte(ACLPolicySnapshot)}) 1577 if err := encoder.Encode(policy); err != nil { 1578 return err 1579 } 1580 } 1581 return nil 1582 } 1583 1584 func (s *nomadSnapshot) persistACLTokens(sink raft.SnapshotSink, 1585 encoder *codec.Encoder) error { 1586 // Get all the policies 1587 ws := memdb.NewWatchSet() 1588 tokens, err := s.snap.ACLTokens(ws) 1589 if err != nil { 1590 return err 1591 } 1592 1593 for { 1594 // Get the next item 1595 raw := tokens.Next() 1596 if raw == nil { 1597 break 1598 } 1599 1600 // Prepare the request struct 1601 token := raw.(*structs.ACLToken) 1602 1603 // Write out a token registration 1604 sink.Write([]byte{byte(ACLTokenSnapshot)}) 1605 if err := encoder.Encode(token); err != nil { 1606 return err 1607 } 1608 } 1609 return nil 1610 } 1611 1612 // Release is a no-op, as we just need to GC the pointer 1613 // to the state store snapshot. There is nothing to explicitly 1614 // cleanup. 1615 func (s *nomadSnapshot) Release() {}