github.com/anuvu/nomad@v0.8.7-atom1/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/state" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/nomad/scheduler" 17 "github.com/hashicorp/raft" 18 "github.com/ugorji/go/codec" 19 ) 20 21 const ( 22 // timeTableGranularity is the granularity of index to time tracking 23 timeTableGranularity = 5 * time.Minute 24 25 // timeTableLimit is the maximum limit of our tracking 26 timeTableLimit = 72 * time.Hour 27 ) 28 29 // SnapshotType is prefixed to a record in the FSM snapshot 30 // so that we can determine the type for restore 31 type SnapshotType byte 32 33 const ( 34 NodeSnapshot SnapshotType = iota 35 JobSnapshot 36 IndexSnapshot 37 EvalSnapshot 38 AllocSnapshot 39 TimeTableSnapshot 40 PeriodicLaunchSnapshot 41 JobSummarySnapshot 42 VaultAccessorSnapshot 43 JobVersionSnapshot 44 DeploymentSnapshot 45 ACLPolicySnapshot 46 ACLTokenSnapshot 47 ) 48 49 // LogApplier is the definition of a function that can apply a Raft log 50 type LogApplier func(buf []byte, index uint64) interface{} 51 52 // LogAppliers is a mapping of the Raft MessageType to the appropriate log 53 // applier 54 type LogAppliers map[structs.MessageType]LogApplier 55 56 // SnapshotRestorer is the definition of a function that can apply a Raft log 57 type SnapshotRestorer func(restore *state.StateRestore, dec *codec.Decoder) error 58 59 // SnapshotRestorers is a mapping of the SnapshotType to the appropriate 60 // snapshot restorer. 61 type SnapshotRestorers map[SnapshotType]SnapshotRestorer 62 63 // nomadFSM implements a finite state machine that is used 64 // along with Raft to provide strong consistency. We implement 65 // this outside the Server to avoid exposing this outside the package. 66 type nomadFSM struct { 67 evalBroker *EvalBroker 68 blockedEvals *BlockedEvals 69 periodicDispatcher *PeriodicDispatch 70 logger *log.Logger 71 state *state.StateStore 72 timetable *TimeTable 73 74 // config is the FSM config 75 config *FSMConfig 76 77 // enterpriseAppliers holds the set of enterprise only LogAppliers 78 enterpriseAppliers LogAppliers 79 80 // enterpriseRestorers holds the set of enterprise only snapshot restorers 81 enterpriseRestorers SnapshotRestorers 82 83 // stateLock is only used to protect outside callers to State() from 84 // racing with Restore(), which is called by Raft (it puts in a totally 85 // new state store). Everything internal here is synchronized by the 86 // Raft side, so doesn't need to lock this. 87 stateLock sync.RWMutex 88 } 89 90 // nomadSnapshot is used to provide a snapshot of the current 91 // state in a way that can be accessed concurrently with operations 92 // that may modify the live state. 93 type nomadSnapshot struct { 94 snap *state.StateSnapshot 95 timetable *TimeTable 96 } 97 98 // snapshotHeader is the first entry in our snapshot 99 type snapshotHeader struct { 100 } 101 102 // FSMConfig is used to configure the FSM 103 type FSMConfig struct { 104 // EvalBroker is the evaluation broker evaluations should be added to 105 EvalBroker *EvalBroker 106 107 // Periodic is the periodic job dispatcher that periodic jobs should be 108 // added/removed from 109 Periodic *PeriodicDispatch 110 111 // BlockedEvals is the blocked eval tracker that blocked evaluations should 112 // be added to. 113 Blocked *BlockedEvals 114 115 // LogOutput is the writer logs should be written to 116 LogOutput io.Writer 117 118 // Region is the region of the server embedding the FSM 119 Region string 120 } 121 122 // NewFSMPath is used to construct a new FSM with a blank state 123 func NewFSM(config *FSMConfig) (*nomadFSM, error) { 124 // Create a state store 125 sconfig := &state.StateStoreConfig{ 126 LogOutput: config.LogOutput, 127 Region: config.Region, 128 } 129 state, err := state.NewStateStore(sconfig) 130 if err != nil { 131 return nil, err 132 } 133 134 fsm := &nomadFSM{ 135 evalBroker: config.EvalBroker, 136 periodicDispatcher: config.Periodic, 137 blockedEvals: config.Blocked, 138 logger: log.New(config.LogOutput, "", log.LstdFlags), 139 config: config, 140 state: state, 141 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 142 enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8), 143 enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8), 144 } 145 146 // Register all the log applier functions 147 fsm.registerLogAppliers() 148 149 // Register all the snapshot restorer functions 150 fsm.registerSnapshotRestorers() 151 152 return fsm, nil 153 } 154 155 // Close is used to cleanup resources associated with the FSM 156 func (n *nomadFSM) Close() error { 157 return nil 158 } 159 160 // State is used to return a handle to the current state 161 func (n *nomadFSM) State() *state.StateStore { 162 n.stateLock.RLock() 163 defer n.stateLock.RUnlock() 164 return n.state 165 } 166 167 // TimeTable returns the time table of transactions 168 func (n *nomadFSM) TimeTable() *TimeTable { 169 return n.timetable 170 } 171 172 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 173 buf := log.Data 174 msgType := structs.MessageType(buf[0]) 175 176 // Witness this write 177 n.timetable.Witness(log.Index, time.Now().UTC()) 178 179 // Check if this message type should be ignored when unknown. This is 180 // used so that new commands can be added with developer control if older 181 // versions can safely ignore the command, or if they should crash. 182 ignoreUnknown := false 183 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 184 msgType &= ^structs.IgnoreUnknownTypeFlag 185 ignoreUnknown = true 186 } 187 188 switch msgType { 189 case structs.NodeRegisterRequestType: 190 return n.applyUpsertNode(buf[1:], log.Index) 191 case structs.NodeDeregisterRequestType: 192 return n.applyDeregisterNode(buf[1:], log.Index) 193 case structs.NodeUpdateStatusRequestType: 194 return n.applyStatusUpdate(buf[1:], log.Index) 195 case structs.NodeUpdateDrainRequestType: 196 return n.applyDrainUpdate(buf[1:], log.Index) 197 case structs.JobRegisterRequestType: 198 return n.applyUpsertJob(buf[1:], log.Index) 199 case structs.JobDeregisterRequestType: 200 return n.applyDeregisterJob(buf[1:], log.Index) 201 case structs.EvalUpdateRequestType: 202 return n.applyUpdateEval(buf[1:], log.Index) 203 case structs.EvalDeleteRequestType: 204 return n.applyDeleteEval(buf[1:], log.Index) 205 case structs.AllocUpdateRequestType: 206 return n.applyAllocUpdate(buf[1:], log.Index) 207 case structs.AllocClientUpdateRequestType: 208 return n.applyAllocClientUpdate(buf[1:], log.Index) 209 case structs.ReconcileJobSummariesRequestType: 210 return n.applyReconcileSummaries(buf[1:], log.Index) 211 case structs.VaultAccessorRegisterRequestType: 212 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 213 case structs.VaultAccessorDeregisterRequestType: 214 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 215 case structs.ApplyPlanResultsRequestType: 216 return n.applyPlanResults(buf[1:], log.Index) 217 case structs.DeploymentStatusUpdateRequestType: 218 return n.applyDeploymentStatusUpdate(buf[1:], log.Index) 219 case structs.DeploymentPromoteRequestType: 220 return n.applyDeploymentPromotion(buf[1:], log.Index) 221 case structs.DeploymentAllocHealthRequestType: 222 return n.applyDeploymentAllocHealth(buf[1:], log.Index) 223 case structs.DeploymentDeleteRequestType: 224 return n.applyDeploymentDelete(buf[1:], log.Index) 225 case structs.JobStabilityRequestType: 226 return n.applyJobStability(buf[1:], log.Index) 227 case structs.ACLPolicyUpsertRequestType: 228 return n.applyACLPolicyUpsert(buf[1:], log.Index) 229 case structs.ACLPolicyDeleteRequestType: 230 return n.applyACLPolicyDelete(buf[1:], log.Index) 231 case structs.ACLTokenUpsertRequestType: 232 return n.applyACLTokenUpsert(buf[1:], log.Index) 233 case structs.ACLTokenDeleteRequestType: 234 return n.applyACLTokenDelete(buf[1:], log.Index) 235 case structs.ACLTokenBootstrapRequestType: 236 return n.applyACLTokenBootstrap(buf[1:], log.Index) 237 case structs.AutopilotRequestType: 238 return n.applyAutopilotUpdate(buf[1:], log.Index) 239 case structs.UpsertNodeEventsType: 240 return n.applyUpsertNodeEvent(buf[1:], log.Index) 241 case structs.JobBatchDeregisterRequestType: 242 return n.applyBatchDeregisterJob(buf[1:], log.Index) 243 case structs.AllocUpdateDesiredTransitionRequestType: 244 return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) 245 case structs.NodeUpdateEligibilityRequestType: 246 return n.applyNodeEligibilityUpdate(buf[1:], log.Index) 247 case structs.BatchNodeUpdateDrainRequestType: 248 return n.applyBatchDrainUpdate(buf[1:], log.Index) 249 } 250 251 // Check enterprise only message types. 252 if applier, ok := n.enterpriseAppliers[msgType]; ok { 253 return applier(buf[1:], log.Index) 254 } 255 256 // We didn't match anything, either panic or ignore 257 if ignoreUnknown { 258 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 259 return nil 260 } 261 262 panic(fmt.Errorf("failed to apply request: %#v", buf)) 263 } 264 265 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 266 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 267 var req structs.NodeRegisterRequest 268 if err := structs.Decode(buf, &req); err != nil { 269 panic(fmt.Errorf("failed to decode request: %v", err)) 270 } 271 272 // Handle upgrade paths 273 req.Node.Canonicalize() 274 275 if err := n.state.UpsertNode(index, req.Node); err != nil { 276 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 277 return err 278 } 279 280 // Unblock evals for the nodes computed node class if it is in a ready 281 // state. 282 if req.Node.Status == structs.NodeStatusReady { 283 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 284 } 285 286 return nil 287 } 288 289 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 290 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 291 var req structs.NodeDeregisterRequest 292 if err := structs.Decode(buf, &req); err != nil { 293 panic(fmt.Errorf("failed to decode request: %v", err)) 294 } 295 296 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 297 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 298 return err 299 } 300 return nil 301 } 302 303 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 304 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 305 var req structs.NodeUpdateStatusRequest 306 if err := structs.Decode(buf, &req); err != nil { 307 panic(fmt.Errorf("failed to decode request: %v", err)) 308 } 309 310 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status, req.NodeEvent); err != nil { 311 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 312 return err 313 } 314 315 // Unblock evals for the nodes computed node class if it is in a ready 316 // state. 317 if req.Status == structs.NodeStatusReady { 318 ws := memdb.NewWatchSet() 319 node, err := n.state.NodeByID(ws, req.NodeID) 320 if err != nil { 321 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 322 return err 323 324 } 325 n.blockedEvals.Unblock(node.ComputedClass, index) 326 } 327 328 return nil 329 } 330 331 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 332 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 333 var req structs.NodeUpdateDrainRequest 334 if err := structs.Decode(buf, &req); err != nil { 335 panic(fmt.Errorf("failed to decode request: %v", err)) 336 } 337 338 // COMPAT Remove in version 0.10 339 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 340 // drain strategy but we need to handle the upgrade path where the Raft log 341 // contains drain updates with just the drain boolean being manipulated. 342 if req.Drain && req.DrainStrategy == nil { 343 // Mark the drain strategy as a force to imitate the old style drain 344 // functionality. 345 req.DrainStrategy = &structs.DrainStrategy{ 346 DrainSpec: structs.DrainSpec{ 347 Deadline: -1 * time.Second, 348 }, 349 } 350 } 351 352 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.NodeEvent); err != nil { 353 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 354 return err 355 } 356 return nil 357 } 358 359 func (n *nomadFSM) applyBatchDrainUpdate(buf []byte, index uint64) interface{} { 360 defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_node_drain_update"}, time.Now()) 361 var req structs.BatchNodeUpdateDrainRequest 362 if err := structs.Decode(buf, &req); err != nil { 363 panic(fmt.Errorf("failed to decode request: %v", err)) 364 } 365 366 if err := n.state.BatchUpdateNodeDrain(index, req.Updates, req.NodeEvents); err != nil { 367 n.logger.Printf("[ERR] nomad.fsm: BatchUpdateNodeDrain failed: %v", err) 368 return err 369 } 370 return nil 371 } 372 373 func (n *nomadFSM) applyNodeEligibilityUpdate(buf []byte, index uint64) interface{} { 374 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_eligibility_update"}, time.Now()) 375 var req structs.NodeUpdateEligibilityRequest 376 if err := structs.Decode(buf, &req); err != nil { 377 panic(fmt.Errorf("failed to decode request: %v", err)) 378 } 379 380 // Lookup the existing node 381 node, err := n.state.NodeByID(nil, req.NodeID) 382 if err != nil { 383 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeEligibility failed to lookup node %q: %v", req.NodeID, err) 384 return err 385 } 386 387 if err := n.state.UpdateNodeEligibility(index, req.NodeID, req.Eligibility, req.NodeEvent); err != nil { 388 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeEligibility failed: %v", err) 389 return err 390 } 391 392 // Unblock evals for the nodes computed node class if it is in a ready 393 // state. 394 if node != nil && node.SchedulingEligibility == structs.NodeSchedulingIneligible && 395 req.Eligibility == structs.NodeSchedulingEligible { 396 n.blockedEvals.Unblock(node.ComputedClass, index) 397 } 398 399 return nil 400 } 401 402 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 403 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 404 var req structs.JobRegisterRequest 405 if err := structs.Decode(buf, &req); err != nil { 406 panic(fmt.Errorf("failed to decode request: %v", err)) 407 } 408 409 /* Handle upgrade paths: 410 * - Empty maps and slices should be treated as nil to avoid 411 * un-intended destructive updates in scheduler since we use 412 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanitizes 413 * the incoming job. 414 * - Migrate from old style upgrade stanza that used only a stagger. 415 */ 416 req.Job.Canonicalize() 417 418 if err := n.state.UpsertJob(index, req.Job); err != nil { 419 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 420 return err 421 } 422 423 // We always add the job to the periodic dispatcher because there is the 424 // possibility that the periodic spec was removed and then we should stop 425 // tracking it. 426 if err := n.periodicDispatcher.Add(req.Job); err != nil { 427 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 428 return fmt.Errorf("failed adding job to periodic dispatcher: %v", err) 429 } 430 431 // Create a watch set 432 ws := memdb.NewWatchSet() 433 434 // If it is an active periodic job, record the time it was inserted. This is 435 // necessary for recovering during leader election. It is possible that from 436 // the time it is added to when it was suppose to launch, leader election 437 // occurs and the job was not launched. In this case, we use the insertion 438 // time to determine if a launch was missed. 439 if req.Job.IsPeriodicActive() { 440 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Namespace, req.Job.ID) 441 if err != nil { 442 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 443 return err 444 } 445 446 // Record the insertion time as a launch. We overload the launch table 447 // such that the first entry is the insertion time. 448 if prevLaunch == nil { 449 launch := &structs.PeriodicLaunch{ 450 ID: req.Job.ID, 451 Namespace: req.Namespace, 452 Launch: time.Now(), 453 } 454 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 455 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 456 return err 457 } 458 } 459 } 460 461 // Check if the parent job is periodic and mark the launch time. 462 parentID := req.Job.ParentID 463 if parentID != "" { 464 parent, err := n.state.JobByID(ws, req.Namespace, parentID) 465 if err != nil { 466 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 467 return err 468 } else if parent == nil { 469 // The parent has been deregistered. 470 return nil 471 } 472 473 if parent.IsPeriodic() && !parent.IsParameterized() { 474 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 475 if err != nil { 476 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 477 return err 478 } 479 480 launch := &structs.PeriodicLaunch{ 481 ID: parentID, 482 Namespace: req.Namespace, 483 Launch: t, 484 } 485 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 486 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 487 return err 488 } 489 } 490 } 491 492 return nil 493 } 494 495 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 496 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 497 var req structs.JobDeregisterRequest 498 if err := structs.Decode(buf, &req); err != nil { 499 panic(fmt.Errorf("failed to decode request: %v", err)) 500 } 501 502 return n.state.WithWriteTransaction(func(tx state.Txn) error { 503 if err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge, tx); err != nil { 504 n.logger.Printf("[ERR] deregistering job failed:%v", err) 505 return err 506 } 507 508 return nil 509 }) 510 } 511 512 func (n *nomadFSM) applyBatchDeregisterJob(buf []byte, index uint64) interface{} { 513 defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_deregister_job"}, time.Now()) 514 var req structs.JobBatchDeregisterRequest 515 if err := structs.Decode(buf, &req); err != nil { 516 panic(fmt.Errorf("failed to decode request: %v", err)) 517 } 518 519 // Perform all store updates atomically to ensure a consistent views for store readers. 520 // A partial update may increment the snapshot index, allowing eval brokers to process 521 // evals for jobs whose deregistering didn't get committed yet. 522 err := n.state.WithWriteTransaction(func(tx state.Txn) error { 523 for jobNS, options := range req.Jobs { 524 if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge, tx); err != nil { 525 n.logger.Printf("[ERR] deregistering job %v failed:%v", jobNS, err) 526 return err 527 } 528 } 529 530 if err := n.state.UpsertEvalsTxn(index, req.Evals, tx); err != nil { 531 n.logger.Printf("[ERR] UpsertEvals failed:%v", err) 532 return err 533 } 534 535 return nil 536 }) 537 538 if err != nil { 539 return err 540 } 541 542 // perform the side effects outside the transactions 543 n.handleUpsertedEvals(req.Evals) 544 return nil 545 } 546 547 // handleJobDeregister is used to deregister a job. 548 func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool, tx state.Txn) error { 549 // If it is periodic remove it from the dispatcher 550 if err := n.periodicDispatcher.Remove(namespace, jobID); err != nil { 551 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 552 return err 553 } 554 555 if purge { 556 if err := n.state.DeleteJobTxn(index, namespace, jobID, tx); err != nil { 557 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 558 return err 559 } 560 561 // We always delete from the periodic launch table because it is possible that 562 // the job was updated to be non-periodic, thus checking if it is periodic 563 // doesn't ensure we clean it up properly. 564 n.state.DeletePeriodicLaunchTxn(index, namespace, jobID, tx) 565 } else { 566 // Get the current job and mark it as stopped and re-insert it. 567 ws := memdb.NewWatchSet() 568 current, err := n.state.JobByIDTxn(ws, namespace, jobID, tx) 569 if err != nil { 570 n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err) 571 return err 572 } 573 574 if current == nil { 575 return fmt.Errorf("job %q in namespace %q doesn't exist to be deregistered", jobID, namespace) 576 } 577 578 stopped := current.Copy() 579 stopped.Stop = true 580 581 if err := n.state.UpsertJobTxn(index, stopped, tx); err != nil { 582 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 583 return err 584 } 585 } 586 587 return nil 588 } 589 590 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 591 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 592 var req structs.EvalUpdateRequest 593 if err := structs.Decode(buf, &req); err != nil { 594 panic(fmt.Errorf("failed to decode request: %v", err)) 595 } 596 return n.upsertEvals(index, req.Evals) 597 } 598 599 func (n *nomadFSM) upsertEvals(index uint64, evals []*structs.Evaluation) error { 600 if err := n.state.UpsertEvals(index, evals); err != nil { 601 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 602 return err 603 } 604 605 n.handleUpsertedEvals(evals) 606 return nil 607 } 608 609 // handleUpsertingEval is a helper for taking action after upserting 610 // evaluations. 611 func (n *nomadFSM) handleUpsertedEvals(evals []*structs.Evaluation) { 612 for _, eval := range evals { 613 n.handleUpsertedEval(eval) 614 } 615 } 616 617 // handleUpsertingEval is a helper for taking action after upserting an eval. 618 func (n *nomadFSM) handleUpsertedEval(eval *structs.Evaluation) { 619 if eval == nil { 620 return 621 } 622 623 if eval.ShouldEnqueue() { 624 n.evalBroker.Enqueue(eval) 625 } else if eval.ShouldBlock() { 626 n.blockedEvals.Block(eval) 627 } else if eval.Status == structs.EvalStatusComplete && 628 len(eval.FailedTGAllocs) == 0 { 629 // If we have a successful evaluation for a node, untrack any 630 // blocked evaluation 631 n.blockedEvals.Untrack(eval.JobID, eval.Namespace) 632 } 633 } 634 635 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 636 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 637 var req structs.EvalDeleteRequest 638 if err := structs.Decode(buf, &req); err != nil { 639 panic(fmt.Errorf("failed to decode request: %v", err)) 640 } 641 642 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 643 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 644 return err 645 } 646 return nil 647 } 648 649 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 650 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 651 var req structs.AllocUpdateRequest 652 if err := structs.Decode(buf, &req); err != nil { 653 panic(fmt.Errorf("failed to decode request: %v", err)) 654 } 655 656 // Attach the job to all the allocations. It is pulled out in the 657 // payload to avoid the redundancy of encoding, but should be denormalized 658 // prior to being inserted into MemDB. 659 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 660 661 // Calculate the total resources of allocations. It is pulled out in the 662 // payload to avoid encoding something that can be computed, but should be 663 // denormalized prior to being inserted into MemDB. 664 for _, alloc := range req.Alloc { 665 if alloc.Resources != nil { 666 // COMPAT 0.4.1 -> 0.5 667 // Set the shared resources for allocations which don't have them 668 if alloc.SharedResources == nil { 669 alloc.SharedResources = &structs.Resources{ 670 DiskMB: alloc.Resources.DiskMB, 671 } 672 } 673 674 continue 675 } 676 677 alloc.Resources = new(structs.Resources) 678 for _, task := range alloc.TaskResources { 679 alloc.Resources.Add(task) 680 } 681 682 // Add the shared resources 683 alloc.Resources.Add(alloc.SharedResources) 684 } 685 686 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 687 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 688 return err 689 } 690 return nil 691 } 692 693 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 694 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 695 var req structs.AllocUpdateRequest 696 if err := structs.Decode(buf, &req); err != nil { 697 panic(fmt.Errorf("failed to decode request: %v", err)) 698 } 699 if len(req.Alloc) == 0 { 700 return nil 701 } 702 703 // Create a watch set 704 ws := memdb.NewWatchSet() 705 706 // Updating the allocs with the job id and task group name 707 for _, alloc := range req.Alloc { 708 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 709 alloc.JobID = existing.JobID 710 alloc.TaskGroup = existing.TaskGroup 711 } 712 } 713 714 // Update all the client allocations 715 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 716 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 717 return err 718 } 719 720 // Update any evals 721 if len(req.Evals) > 0 { 722 if err := n.upsertEvals(index, req.Evals); err != nil { 723 n.logger.Printf("[ERR] nomad.fsm: applyAllocClientUpdate failed to update evaluations: %v", err) 724 return err 725 } 726 } 727 728 // Unblock evals for the nodes computed node class if the client has 729 // finished running an allocation. 730 for _, alloc := range req.Alloc { 731 if alloc.ClientStatus == structs.AllocClientStatusComplete || 732 alloc.ClientStatus == structs.AllocClientStatusFailed { 733 nodeID := alloc.NodeID 734 node, err := n.state.NodeByID(ws, nodeID) 735 if err != nil || node == nil { 736 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 737 return err 738 739 } 740 741 // Unblock any associated quota 742 quota, err := n.allocQuota(alloc.ID) 743 if err != nil { 744 n.logger.Printf("[ERR] nomad.fsm: looking up quota associated with alloc %q failed: %v", alloc.ID, err) 745 return err 746 } 747 748 n.blockedEvals.UnblockClassAndQuota(node.ComputedClass, quota, index) 749 } 750 } 751 752 return nil 753 } 754 755 // applyAllocUpdateDesiredTransition is used to update the desired transitions 756 // of a set of allocations. 757 func (n *nomadFSM) applyAllocUpdateDesiredTransition(buf []byte, index uint64) interface{} { 758 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update_desired_transition"}, time.Now()) 759 var req structs.AllocUpdateDesiredTransitionRequest 760 if err := structs.Decode(buf, &req); err != nil { 761 panic(fmt.Errorf("failed to decode request: %v", err)) 762 } 763 764 if err := n.state.UpdateAllocsDesiredTransitions(index, req.Allocs, req.Evals); err != nil { 765 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocsDesiredTransitions failed: %v", err) 766 return err 767 } 768 769 n.handleUpsertedEvals(req.Evals) 770 return nil 771 } 772 773 // applyReconcileSummaries reconciles summaries for all the jobs 774 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 775 if err := n.state.ReconcileJobSummaries(index); err != nil { 776 return err 777 } 778 return n.reconcileQueuedAllocations(index) 779 } 780 781 // applyUpsertNodeEvent tracks the given node events. 782 func (n *nomadFSM) applyUpsertNodeEvent(buf []byte, index uint64) interface{} { 783 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_node_events"}, time.Now()) 784 var req structs.EmitNodeEventsRequest 785 if err := structs.Decode(buf, &req); err != nil { 786 n.logger.Printf("[ERR] nomad.fsm: failed to decode EmitNodeEventsRequest: %v", err) 787 return err 788 } 789 790 if err := n.state.UpsertNodeEvents(index, req.NodeEvents); err != nil { 791 n.logger.Printf("[ERR] nomad.fsm: failed to add node events: %v", err) 792 return err 793 } 794 795 return nil 796 } 797 798 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 799 // and task 800 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 801 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 802 var req structs.VaultAccessorsRequest 803 if err := structs.Decode(buf, &req); err != nil { 804 panic(fmt.Errorf("failed to decode request: %v", err)) 805 } 806 807 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 808 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 809 return err 810 } 811 812 return nil 813 } 814 815 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 816 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 817 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 818 var req structs.VaultAccessorsRequest 819 if err := structs.Decode(buf, &req); err != nil { 820 panic(fmt.Errorf("failed to decode request: %v", err)) 821 } 822 823 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 824 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 825 return err 826 } 827 828 return nil 829 } 830 831 // applyPlanApply applies the results of a plan application 832 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 833 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 834 var req structs.ApplyPlanResultsRequest 835 if err := structs.Decode(buf, &req); err != nil { 836 panic(fmt.Errorf("failed to decode request: %v", err)) 837 } 838 839 if err := n.state.UpsertPlanResults(index, &req); err != nil { 840 n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err) 841 return err 842 } 843 844 return nil 845 } 846 847 // applyDeploymentStatusUpdate is used to update the status of an existing 848 // deployment 849 func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} { 850 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now()) 851 var req structs.DeploymentStatusUpdateRequest 852 if err := structs.Decode(buf, &req); err != nil { 853 panic(fmt.Errorf("failed to decode request: %v", err)) 854 } 855 856 if err := n.state.UpdateDeploymentStatus(index, &req); err != nil { 857 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentStatusUpdate failed: %v", err) 858 return err 859 } 860 861 n.handleUpsertedEval(req.Eval) 862 return nil 863 } 864 865 // applyDeploymentPromotion is used to promote canaries in a deployment 866 func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} { 867 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now()) 868 var req structs.ApplyDeploymentPromoteRequest 869 if err := structs.Decode(buf, &req); err != nil { 870 panic(fmt.Errorf("failed to decode request: %v", err)) 871 } 872 873 if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil { 874 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentPromotion failed: %v", err) 875 return err 876 } 877 878 n.handleUpsertedEval(req.Eval) 879 return nil 880 } 881 882 // applyDeploymentAllocHealth is used to set the health of allocations as part 883 // of a deployment 884 func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} { 885 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now()) 886 var req structs.ApplyDeploymentAllocHealthRequest 887 if err := structs.Decode(buf, &req); err != nil { 888 panic(fmt.Errorf("failed to decode request: %v", err)) 889 } 890 891 if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil { 892 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentAllocHealth failed: %v", err) 893 return err 894 } 895 896 n.handleUpsertedEval(req.Eval) 897 return nil 898 } 899 900 // applyDeploymentDelete is used to delete a set of deployments 901 func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} { 902 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now()) 903 var req structs.DeploymentDeleteRequest 904 if err := structs.Decode(buf, &req); err != nil { 905 panic(fmt.Errorf("failed to decode request: %v", err)) 906 } 907 908 if err := n.state.DeleteDeployment(index, req.Deployments); err != nil { 909 n.logger.Printf("[ERR] nomad.fsm: DeleteDeployment failed: %v", err) 910 return err 911 } 912 913 return nil 914 } 915 916 // applyJobStability is used to set the stability of a job 917 func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} { 918 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now()) 919 var req structs.JobStabilityRequest 920 if err := structs.Decode(buf, &req); err != nil { 921 panic(fmt.Errorf("failed to decode request: %v", err)) 922 } 923 924 if err := n.state.UpdateJobStability(index, req.Namespace, req.JobID, req.JobVersion, req.Stable); err != nil { 925 n.logger.Printf("[ERR] nomad.fsm: UpdateJobStability failed: %v", err) 926 return err 927 } 928 929 return nil 930 } 931 932 // applyACLPolicyUpsert is used to upsert a set of policies 933 func (n *nomadFSM) applyACLPolicyUpsert(buf []byte, index uint64) interface{} { 934 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_upsert"}, time.Now()) 935 var req structs.ACLPolicyUpsertRequest 936 if err := structs.Decode(buf, &req); err != nil { 937 panic(fmt.Errorf("failed to decode request: %v", err)) 938 } 939 940 if err := n.state.UpsertACLPolicies(index, req.Policies); err != nil { 941 n.logger.Printf("[ERR] nomad.fsm: UpsertACLPolicies failed: %v", err) 942 return err 943 } 944 return nil 945 } 946 947 // applyACLPolicyDelete is used to delete a set of policies 948 func (n *nomadFSM) applyACLPolicyDelete(buf []byte, index uint64) interface{} { 949 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_delete"}, time.Now()) 950 var req structs.ACLPolicyDeleteRequest 951 if err := structs.Decode(buf, &req); err != nil { 952 panic(fmt.Errorf("failed to decode request: %v", err)) 953 } 954 955 if err := n.state.DeleteACLPolicies(index, req.Names); err != nil { 956 n.logger.Printf("[ERR] nomad.fsm: DeleteACLPolicies failed: %v", err) 957 return err 958 } 959 return nil 960 } 961 962 // applyACLTokenUpsert is used to upsert a set of policies 963 func (n *nomadFSM) applyACLTokenUpsert(buf []byte, index uint64) interface{} { 964 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_upsert"}, time.Now()) 965 var req structs.ACLTokenUpsertRequest 966 if err := structs.Decode(buf, &req); err != nil { 967 panic(fmt.Errorf("failed to decode request: %v", err)) 968 } 969 970 if err := n.state.UpsertACLTokens(index, req.Tokens); err != nil { 971 n.logger.Printf("[ERR] nomad.fsm: UpsertACLTokens failed: %v", err) 972 return err 973 } 974 return nil 975 } 976 977 // applyACLTokenDelete is used to delete a set of policies 978 func (n *nomadFSM) applyACLTokenDelete(buf []byte, index uint64) interface{} { 979 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_delete"}, time.Now()) 980 var req structs.ACLTokenDeleteRequest 981 if err := structs.Decode(buf, &req); err != nil { 982 panic(fmt.Errorf("failed to decode request: %v", err)) 983 } 984 985 if err := n.state.DeleteACLTokens(index, req.AccessorIDs); err != nil { 986 n.logger.Printf("[ERR] nomad.fsm: DeleteACLTokens failed: %v", err) 987 return err 988 } 989 return nil 990 } 991 992 // applyACLTokenBootstrap is used to bootstrap an ACL token 993 func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{} { 994 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_bootstrap"}, time.Now()) 995 var req structs.ACLTokenBootstrapRequest 996 if err := structs.Decode(buf, &req); err != nil { 997 panic(fmt.Errorf("failed to decode request: %v", err)) 998 } 999 1000 if err := n.state.BootstrapACLTokens(index, req.ResetIndex, req.Token); err != nil { 1001 n.logger.Printf("[ERR] nomad.fsm: BootstrapACLToken failed: %v", err) 1002 return err 1003 } 1004 return nil 1005 } 1006 1007 func (n *nomadFSM) applyAutopilotUpdate(buf []byte, index uint64) interface{} { 1008 var req structs.AutopilotSetConfigRequest 1009 if err := structs.Decode(buf, &req); err != nil { 1010 panic(fmt.Errorf("failed to decode request: %v", err)) 1011 } 1012 defer metrics.MeasureSince([]string{"nomad", "fsm", "autopilot"}, time.Now()) 1013 1014 if req.CAS { 1015 act, err := n.state.AutopilotCASConfig(index, req.Config.ModifyIndex, &req.Config) 1016 if err != nil { 1017 return err 1018 } 1019 return act 1020 } 1021 return n.state.AutopilotSetConfig(index, &req.Config) 1022 } 1023 1024 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 1025 // Create a new snapshot 1026 snap, err := n.state.Snapshot() 1027 if err != nil { 1028 return nil, err 1029 } 1030 1031 ns := &nomadSnapshot{ 1032 snap: snap, 1033 timetable: n.timetable, 1034 } 1035 return ns, nil 1036 } 1037 1038 func (n *nomadFSM) Restore(old io.ReadCloser) error { 1039 defer old.Close() 1040 1041 // Create a new state store 1042 config := &state.StateStoreConfig{ 1043 LogOutput: n.config.LogOutput, 1044 Region: n.config.Region, 1045 } 1046 newState, err := state.NewStateStore(config) 1047 if err != nil { 1048 return err 1049 } 1050 1051 // Start the state restore 1052 restore, err := newState.Restore() 1053 if err != nil { 1054 return err 1055 } 1056 defer restore.Abort() 1057 1058 // Create a decoder 1059 dec := codec.NewDecoder(old, structs.MsgpackHandle) 1060 1061 // Read in the header 1062 var header snapshotHeader 1063 if err := dec.Decode(&header); err != nil { 1064 return err 1065 } 1066 1067 // Populate the new state 1068 msgType := make([]byte, 1) 1069 for { 1070 // Read the message type 1071 _, err := old.Read(msgType) 1072 if err == io.EOF { 1073 break 1074 } else if err != nil { 1075 return err 1076 } 1077 1078 // Decode 1079 snapType := SnapshotType(msgType[0]) 1080 switch snapType { 1081 case TimeTableSnapshot: 1082 if err := n.timetable.Deserialize(dec); err != nil { 1083 return fmt.Errorf("time table deserialize failed: %v", err) 1084 } 1085 1086 case NodeSnapshot: 1087 node := new(structs.Node) 1088 if err := dec.Decode(node); err != nil { 1089 return err 1090 } 1091 1092 // Handle upgrade paths 1093 node.Canonicalize() 1094 1095 if err := restore.NodeRestore(node); err != nil { 1096 return err 1097 } 1098 1099 case JobSnapshot: 1100 job := new(structs.Job) 1101 if err := dec.Decode(job); err != nil { 1102 return err 1103 } 1104 1105 /* Handle upgrade paths: 1106 * - Empty maps and slices should be treated as nil to avoid 1107 * un-intended destructive updates in scheduler since we use 1108 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanitizes 1109 * the incoming job. 1110 * - Migrate from old style upgrade stanza that used only a stagger. 1111 */ 1112 job.Canonicalize() 1113 1114 if err := restore.JobRestore(job); err != nil { 1115 return err 1116 } 1117 1118 case EvalSnapshot: 1119 eval := new(structs.Evaluation) 1120 if err := dec.Decode(eval); err != nil { 1121 return err 1122 } 1123 1124 // COMPAT: Handle upgrade to v0.7.0 1125 if eval.Namespace == "" { 1126 eval.Namespace = structs.DefaultNamespace 1127 } 1128 1129 if err := restore.EvalRestore(eval); err != nil { 1130 return err 1131 } 1132 1133 case AllocSnapshot: 1134 alloc := new(structs.Allocation) 1135 if err := dec.Decode(alloc); err != nil { 1136 return err 1137 } 1138 1139 // COMPAT: Handle upgrade to v0.7.0 1140 if alloc.Namespace == "" { 1141 alloc.Namespace = structs.DefaultNamespace 1142 } 1143 1144 if err := restore.AllocRestore(alloc); err != nil { 1145 return err 1146 } 1147 1148 case IndexSnapshot: 1149 idx := new(state.IndexEntry) 1150 if err := dec.Decode(idx); err != nil { 1151 return err 1152 } 1153 if err := restore.IndexRestore(idx); err != nil { 1154 return err 1155 } 1156 1157 case PeriodicLaunchSnapshot: 1158 launch := new(structs.PeriodicLaunch) 1159 if err := dec.Decode(launch); err != nil { 1160 return err 1161 } 1162 1163 // COMPAT: Handle upgrade to v0.7.0 1164 if launch.Namespace == "" { 1165 launch.Namespace = structs.DefaultNamespace 1166 } 1167 1168 if err := restore.PeriodicLaunchRestore(launch); err != nil { 1169 return err 1170 } 1171 1172 case JobSummarySnapshot: 1173 summary := new(structs.JobSummary) 1174 if err := dec.Decode(summary); err != nil { 1175 return err 1176 } 1177 1178 // COMPAT: Handle upgrade to v0.7.0 1179 if summary.Namespace == "" { 1180 summary.Namespace = structs.DefaultNamespace 1181 } 1182 1183 if err := restore.JobSummaryRestore(summary); err != nil { 1184 return err 1185 } 1186 1187 case VaultAccessorSnapshot: 1188 accessor := new(structs.VaultAccessor) 1189 if err := dec.Decode(accessor); err != nil { 1190 return err 1191 } 1192 if err := restore.VaultAccessorRestore(accessor); err != nil { 1193 return err 1194 } 1195 1196 case JobVersionSnapshot: 1197 version := new(structs.Job) 1198 if err := dec.Decode(version); err != nil { 1199 return err 1200 } 1201 1202 // COMPAT: Handle upgrade to v0.7.0 1203 if version.Namespace == "" { 1204 version.Namespace = structs.DefaultNamespace 1205 } 1206 1207 if err := restore.JobVersionRestore(version); err != nil { 1208 return err 1209 } 1210 1211 case DeploymentSnapshot: 1212 deployment := new(structs.Deployment) 1213 if err := dec.Decode(deployment); err != nil { 1214 return err 1215 } 1216 1217 // COMPAT: Handle upgrade to v0.7.0 1218 if deployment.Namespace == "" { 1219 deployment.Namespace = structs.DefaultNamespace 1220 } 1221 1222 if err := restore.DeploymentRestore(deployment); err != nil { 1223 return err 1224 } 1225 1226 case ACLPolicySnapshot: 1227 policy := new(structs.ACLPolicy) 1228 if err := dec.Decode(policy); err != nil { 1229 return err 1230 } 1231 if err := restore.ACLPolicyRestore(policy); err != nil { 1232 return err 1233 } 1234 1235 case ACLTokenSnapshot: 1236 token := new(structs.ACLToken) 1237 if err := dec.Decode(token); err != nil { 1238 return err 1239 } 1240 if err := restore.ACLTokenRestore(token); err != nil { 1241 return err 1242 } 1243 1244 default: 1245 // Check if this is an enterprise only object being restored 1246 restorer, ok := n.enterpriseRestorers[snapType] 1247 if !ok { 1248 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 1249 } 1250 1251 // Restore the enterprise only object 1252 if err := restorer(restore, dec); err != nil { 1253 return err 1254 } 1255 } 1256 } 1257 1258 restore.Commit() 1259 1260 // Create Job Summaries 1261 // COMPAT 0.4 -> 0.4.1 1262 // We can remove this in 0.5. This exists so that the server creates job 1263 // summaries if they were not present previously. When users upgrade to 0.5 1264 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 1265 // remove this block. 1266 index, err := newState.Index("job_summary") 1267 if err != nil { 1268 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 1269 } 1270 1271 // If the index is 0 that means there is no job summary in the snapshot so 1272 // we will have to create them 1273 if index == 0 { 1274 // query the latest index 1275 latestIndex, err := newState.LatestIndex() 1276 if err != nil { 1277 return fmt.Errorf("unable to query latest index: %v", index) 1278 } 1279 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 1280 return fmt.Errorf("error reconciling summaries: %v", err) 1281 } 1282 } 1283 1284 // COMPAT Remove in 0.10 1285 // Clean up active deployments that do not have a job 1286 if err := n.failLeakedDeployments(newState); err != nil { 1287 return err 1288 } 1289 1290 // External code might be calling State(), so we need to synchronize 1291 // here to make sure we swap in the new state store atomically. 1292 n.stateLock.Lock() 1293 stateOld := n.state 1294 n.state = newState 1295 n.stateLock.Unlock() 1296 1297 // Signal that the old state store has been abandoned. This is required 1298 // because we don't operate on it any more, we just throw it away, so 1299 // blocking queries won't see any changes and need to be woken up. 1300 stateOld.Abandon() 1301 1302 return nil 1303 } 1304 1305 // failLeakedDeployments is used to fail deployments that do not have a job. 1306 // This state is a broken invariant that should not occur since 0.8.X. 1307 func (n *nomadFSM) failLeakedDeployments(state *state.StateStore) error { 1308 // Scan for deployments that are referencing a job that no longer exists. 1309 // This could happen if multiple deployments were created for a given job 1310 // and thus the older deployment leaks and then the job is removed. 1311 iter, err := state.Deployments(nil) 1312 if err != nil { 1313 return fmt.Errorf("failed to query deployments: %v", err) 1314 } 1315 1316 dindex, err := state.Index("deployment") 1317 if err != nil { 1318 return fmt.Errorf("couldn't fetch index of deployments table: %v", err) 1319 } 1320 1321 for { 1322 raw := iter.Next() 1323 if raw == nil { 1324 break 1325 } 1326 1327 d := raw.(*structs.Deployment) 1328 1329 // We are only looking for active deployments where the job no longer 1330 // exists 1331 if !d.Active() { 1332 continue 1333 } 1334 1335 // Find the job 1336 job, err := state.JobByID(nil, d.Namespace, d.JobID) 1337 if err != nil { 1338 return fmt.Errorf("failed to lookup job %s from deployment %q: %v", d.JobID, d.ID, err) 1339 } 1340 1341 // Job exists. 1342 if job != nil { 1343 continue 1344 } 1345 1346 // Update the deployment to be terminal 1347 failed := d.Copy() 1348 failed.Status = structs.DeploymentStatusCancelled 1349 failed.StatusDescription = structs.DeploymentStatusDescriptionStoppedJob 1350 if err := state.UpsertDeployment(dindex, failed); err != nil { 1351 return fmt.Errorf("failed to mark leaked deployment %q as failed: %v", failed.ID, err) 1352 } 1353 } 1354 1355 return nil 1356 } 1357 1358 // reconcileQueuedAllocations re-calculates the queued allocations for every job that we 1359 // created a Job Summary during the snap shot restore 1360 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 1361 // Get all the jobs 1362 ws := memdb.NewWatchSet() 1363 iter, err := n.state.Jobs(ws) 1364 if err != nil { 1365 return err 1366 } 1367 1368 snap, err := n.state.Snapshot() 1369 if err != nil { 1370 return fmt.Errorf("unable to create snapshot: %v", err) 1371 } 1372 1373 // Invoking the scheduler for every job so that we can populate the number 1374 // of queued allocations for every job 1375 for { 1376 rawJob := iter.Next() 1377 if rawJob == nil { 1378 break 1379 } 1380 job := rawJob.(*structs.Job) 1381 planner := &scheduler.Harness{ 1382 State: &snap.StateStore, 1383 } 1384 // Create an eval and mark it as requiring annotations and insert that as well 1385 eval := &structs.Evaluation{ 1386 ID: uuid.Generate(), 1387 Namespace: job.Namespace, 1388 Priority: job.Priority, 1389 Type: job.Type, 1390 TriggeredBy: structs.EvalTriggerJobRegister, 1391 JobID: job.ID, 1392 JobModifyIndex: job.JobModifyIndex + 1, 1393 Status: structs.EvalStatusPending, 1394 AnnotatePlan: true, 1395 } 1396 snap.UpsertEvals(100, []*structs.Evaluation{eval}) 1397 // Create the scheduler and run it 1398 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 1399 if err != nil { 1400 return err 1401 } 1402 1403 if err := sched.Process(eval); err != nil { 1404 return err 1405 } 1406 1407 // Get the job summary from the fsm state store 1408 originalSummary, err := n.state.JobSummaryByID(ws, job.Namespace, job.ID) 1409 if err != nil { 1410 return err 1411 } 1412 summary := originalSummary.Copy() 1413 1414 // Add the allocations scheduler has made to queued since these 1415 // allocations are never getting placed until the scheduler is invoked 1416 // with a real planner 1417 if l := len(planner.Plans); l != 1 { 1418 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 1419 } 1420 for _, allocations := range planner.Plans[0].NodeAllocation { 1421 for _, allocation := range allocations { 1422 tgSummary, ok := summary.Summary[allocation.TaskGroup] 1423 if !ok { 1424 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 1425 } 1426 tgSummary.Queued += 1 1427 summary.Summary[allocation.TaskGroup] = tgSummary 1428 } 1429 } 1430 1431 // Add the queued allocations attached to the evaluation to the queued 1432 // counter of the job summary 1433 if l := len(planner.Evals); l != 1 { 1434 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 1435 } 1436 for tg, queued := range planner.Evals[0].QueuedAllocations { 1437 tgSummary, ok := summary.Summary[tg] 1438 if !ok { 1439 return fmt.Errorf("task group %q not found while updating queued count", tg) 1440 } 1441 1442 // We add instead of setting here because we want to take into 1443 // consideration what the scheduler with a mock planner thinks it 1444 // placed. Those should be counted as queued as well 1445 tgSummary.Queued += queued 1446 summary.Summary[tg] = tgSummary 1447 } 1448 1449 if !reflect.DeepEqual(summary, originalSummary) { 1450 summary.ModifyIndex = index 1451 if err := n.state.UpsertJobSummary(index, summary); err != nil { 1452 return err 1453 } 1454 } 1455 } 1456 return nil 1457 } 1458 1459 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 1460 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 1461 // Register the nodes 1462 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 1463 1464 // Write the header 1465 header := snapshotHeader{} 1466 if err := encoder.Encode(&header); err != nil { 1467 sink.Cancel() 1468 return err 1469 } 1470 1471 // Write the time table 1472 sink.Write([]byte{byte(TimeTableSnapshot)}) 1473 if err := s.timetable.Serialize(encoder); err != nil { 1474 sink.Cancel() 1475 return err 1476 } 1477 1478 // Write all the data out 1479 if err := s.persistIndexes(sink, encoder); err != nil { 1480 sink.Cancel() 1481 return err 1482 } 1483 if err := s.persistNodes(sink, encoder); err != nil { 1484 sink.Cancel() 1485 return err 1486 } 1487 if err := s.persistJobs(sink, encoder); err != nil { 1488 sink.Cancel() 1489 return err 1490 } 1491 if err := s.persistEvals(sink, encoder); err != nil { 1492 sink.Cancel() 1493 return err 1494 } 1495 if err := s.persistAllocs(sink, encoder); err != nil { 1496 sink.Cancel() 1497 return err 1498 } 1499 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 1500 sink.Cancel() 1501 return err 1502 } 1503 if err := s.persistJobSummaries(sink, encoder); err != nil { 1504 sink.Cancel() 1505 return err 1506 } 1507 if err := s.persistVaultAccessors(sink, encoder); err != nil { 1508 sink.Cancel() 1509 return err 1510 } 1511 if err := s.persistJobVersions(sink, encoder); err != nil { 1512 sink.Cancel() 1513 return err 1514 } 1515 if err := s.persistDeployments(sink, encoder); err != nil { 1516 sink.Cancel() 1517 return err 1518 } 1519 if err := s.persistACLPolicies(sink, encoder); err != nil { 1520 sink.Cancel() 1521 return err 1522 } 1523 if err := s.persistACLTokens(sink, encoder); err != nil { 1524 sink.Cancel() 1525 return err 1526 } 1527 if err := s.persistEnterpriseTables(sink, encoder); err != nil { 1528 sink.Cancel() 1529 return err 1530 } 1531 return nil 1532 } 1533 1534 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 1535 encoder *codec.Encoder) error { 1536 // Get all the indexes 1537 iter, err := s.snap.Indexes() 1538 if err != nil { 1539 return err 1540 } 1541 1542 for { 1543 // Get the next item 1544 raw := iter.Next() 1545 if raw == nil { 1546 break 1547 } 1548 1549 // Prepare the request struct 1550 idx := raw.(*state.IndexEntry) 1551 1552 // Write out a node registration 1553 sink.Write([]byte{byte(IndexSnapshot)}) 1554 if err := encoder.Encode(idx); err != nil { 1555 return err 1556 } 1557 } 1558 return nil 1559 } 1560 1561 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 1562 encoder *codec.Encoder) error { 1563 // Get all the nodes 1564 ws := memdb.NewWatchSet() 1565 nodes, err := s.snap.Nodes(ws) 1566 if err != nil { 1567 return err 1568 } 1569 1570 for { 1571 // Get the next item 1572 raw := nodes.Next() 1573 if raw == nil { 1574 break 1575 } 1576 1577 // Prepare the request struct 1578 node := raw.(*structs.Node) 1579 1580 // Write out a node registration 1581 sink.Write([]byte{byte(NodeSnapshot)}) 1582 if err := encoder.Encode(node); err != nil { 1583 return err 1584 } 1585 } 1586 return nil 1587 } 1588 1589 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 1590 encoder *codec.Encoder) error { 1591 // Get all the jobs 1592 ws := memdb.NewWatchSet() 1593 jobs, err := s.snap.Jobs(ws) 1594 if err != nil { 1595 return err 1596 } 1597 1598 for { 1599 // Get the next item 1600 raw := jobs.Next() 1601 if raw == nil { 1602 break 1603 } 1604 1605 // Prepare the request struct 1606 job := raw.(*structs.Job) 1607 1608 // Write out a job registration 1609 sink.Write([]byte{byte(JobSnapshot)}) 1610 if err := encoder.Encode(job); err != nil { 1611 return err 1612 } 1613 } 1614 return nil 1615 } 1616 1617 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1618 encoder *codec.Encoder) error { 1619 // Get all the evaluations 1620 ws := memdb.NewWatchSet() 1621 evals, err := s.snap.Evals(ws) 1622 if err != nil { 1623 return err 1624 } 1625 1626 for { 1627 // Get the next item 1628 raw := evals.Next() 1629 if raw == nil { 1630 break 1631 } 1632 1633 // Prepare the request struct 1634 eval := raw.(*structs.Evaluation) 1635 1636 // Write out the evaluation 1637 sink.Write([]byte{byte(EvalSnapshot)}) 1638 if err := encoder.Encode(eval); err != nil { 1639 return err 1640 } 1641 } 1642 return nil 1643 } 1644 1645 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1646 encoder *codec.Encoder) error { 1647 // Get all the allocations 1648 ws := memdb.NewWatchSet() 1649 allocs, err := s.snap.Allocs(ws) 1650 if err != nil { 1651 return err 1652 } 1653 1654 for { 1655 // Get the next item 1656 raw := allocs.Next() 1657 if raw == nil { 1658 break 1659 } 1660 1661 // Prepare the request struct 1662 alloc := raw.(*structs.Allocation) 1663 1664 // Write out the evaluation 1665 sink.Write([]byte{byte(AllocSnapshot)}) 1666 if err := encoder.Encode(alloc); err != nil { 1667 return err 1668 } 1669 } 1670 return nil 1671 } 1672 1673 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1674 encoder *codec.Encoder) error { 1675 // Get all the jobs 1676 ws := memdb.NewWatchSet() 1677 launches, err := s.snap.PeriodicLaunches(ws) 1678 if err != nil { 1679 return err 1680 } 1681 1682 for { 1683 // Get the next item 1684 raw := launches.Next() 1685 if raw == nil { 1686 break 1687 } 1688 1689 // Prepare the request struct 1690 launch := raw.(*structs.PeriodicLaunch) 1691 1692 // Write out a job registration 1693 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1694 if err := encoder.Encode(launch); err != nil { 1695 return err 1696 } 1697 } 1698 return nil 1699 } 1700 1701 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1702 encoder *codec.Encoder) error { 1703 1704 ws := memdb.NewWatchSet() 1705 summaries, err := s.snap.JobSummaries(ws) 1706 if err != nil { 1707 return err 1708 } 1709 1710 for { 1711 raw := summaries.Next() 1712 if raw == nil { 1713 break 1714 } 1715 1716 jobSummary := raw.(*structs.JobSummary) 1717 1718 sink.Write([]byte{byte(JobSummarySnapshot)}) 1719 if err := encoder.Encode(jobSummary); err != nil { 1720 return err 1721 } 1722 } 1723 return nil 1724 } 1725 1726 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1727 encoder *codec.Encoder) error { 1728 1729 ws := memdb.NewWatchSet() 1730 accessors, err := s.snap.VaultAccessors(ws) 1731 if err != nil { 1732 return err 1733 } 1734 1735 for { 1736 raw := accessors.Next() 1737 if raw == nil { 1738 break 1739 } 1740 1741 accessor := raw.(*structs.VaultAccessor) 1742 1743 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1744 if err := encoder.Encode(accessor); err != nil { 1745 return err 1746 } 1747 } 1748 return nil 1749 } 1750 1751 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 1752 encoder *codec.Encoder) error { 1753 // Get all the jobs 1754 ws := memdb.NewWatchSet() 1755 versions, err := s.snap.JobVersions(ws) 1756 if err != nil { 1757 return err 1758 } 1759 1760 for { 1761 // Get the next item 1762 raw := versions.Next() 1763 if raw == nil { 1764 break 1765 } 1766 1767 // Prepare the request struct 1768 job := raw.(*structs.Job) 1769 1770 // Write out a job registration 1771 sink.Write([]byte{byte(JobVersionSnapshot)}) 1772 if err := encoder.Encode(job); err != nil { 1773 return err 1774 } 1775 } 1776 return nil 1777 } 1778 1779 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 1780 encoder *codec.Encoder) error { 1781 // Get all the jobs 1782 ws := memdb.NewWatchSet() 1783 deployments, err := s.snap.Deployments(ws) 1784 if err != nil { 1785 return err 1786 } 1787 1788 for { 1789 // Get the next item 1790 raw := deployments.Next() 1791 if raw == nil { 1792 break 1793 } 1794 1795 // Prepare the request struct 1796 deployment := raw.(*structs.Deployment) 1797 1798 // Write out a job registration 1799 sink.Write([]byte{byte(DeploymentSnapshot)}) 1800 if err := encoder.Encode(deployment); err != nil { 1801 return err 1802 } 1803 } 1804 return nil 1805 } 1806 1807 func (s *nomadSnapshot) persistACLPolicies(sink raft.SnapshotSink, 1808 encoder *codec.Encoder) error { 1809 // Get all the policies 1810 ws := memdb.NewWatchSet() 1811 policies, err := s.snap.ACLPolicies(ws) 1812 if err != nil { 1813 return err 1814 } 1815 1816 for { 1817 // Get the next item 1818 raw := policies.Next() 1819 if raw == nil { 1820 break 1821 } 1822 1823 // Prepare the request struct 1824 policy := raw.(*structs.ACLPolicy) 1825 1826 // Write out a policy registration 1827 sink.Write([]byte{byte(ACLPolicySnapshot)}) 1828 if err := encoder.Encode(policy); err != nil { 1829 return err 1830 } 1831 } 1832 return nil 1833 } 1834 1835 func (s *nomadSnapshot) persistACLTokens(sink raft.SnapshotSink, 1836 encoder *codec.Encoder) error { 1837 // Get all the policies 1838 ws := memdb.NewWatchSet() 1839 tokens, err := s.snap.ACLTokens(ws) 1840 if err != nil { 1841 return err 1842 } 1843 1844 for { 1845 // Get the next item 1846 raw := tokens.Next() 1847 if raw == nil { 1848 break 1849 } 1850 1851 // Prepare the request struct 1852 token := raw.(*structs.ACLToken) 1853 1854 // Write out a token registration 1855 sink.Write([]byte{byte(ACLTokenSnapshot)}) 1856 if err := encoder.Encode(token); err != nil { 1857 return err 1858 } 1859 } 1860 return nil 1861 } 1862 1863 // Release is a no-op, as we just need to GC the pointer 1864 // to the state store snapshot. There is nothing to explicitly 1865 // cleanup. 1866 func (s *nomadSnapshot) Release() {}