github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/nomad/state" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/scheduler" 16 "github.com/hashicorp/raft" 17 "github.com/ugorji/go/codec" 18 ) 19 20 const ( 21 // timeTableGranularity is the granularity of index to time tracking 22 timeTableGranularity = 5 * time.Minute 23 24 // timeTableLimit is the maximum limit of our tracking 25 timeTableLimit = 72 * time.Hour 26 ) 27 28 // SnapshotType is prefixed to a record in the FSM snapshot 29 // so that we can determine the type for restore 30 type SnapshotType byte 31 32 const ( 33 NodeSnapshot SnapshotType = iota 34 JobSnapshot 35 IndexSnapshot 36 EvalSnapshot 37 AllocSnapshot 38 TimeTableSnapshot 39 PeriodicLaunchSnapshot 40 JobSummarySnapshot 41 VaultAccessorSnapshot 42 JobVersionSnapshot 43 DeploymentSnapshot 44 ) 45 46 // nomadFSM implements a finite state machine that is used 47 // along with Raft to provide strong consistency. We implement 48 // this outside the Server to avoid exposing this outside the package. 49 type nomadFSM struct { 50 evalBroker *EvalBroker 51 blockedEvals *BlockedEvals 52 periodicDispatcher *PeriodicDispatch 53 logOutput io.Writer 54 logger *log.Logger 55 state *state.StateStore 56 timetable *TimeTable 57 58 // stateLock is only used to protect outside callers to State() from 59 // racing with Restore(), which is called by Raft (it puts in a totally 60 // new state store). Everything internal here is synchronized by the 61 // Raft side, so doesn't need to lock this. 62 stateLock sync.RWMutex 63 } 64 65 // nomadSnapshot is used to provide a snapshot of the current 66 // state in a way that can be accessed concurrently with operations 67 // that may modify the live state. 68 type nomadSnapshot struct { 69 snap *state.StateSnapshot 70 timetable *TimeTable 71 } 72 73 // snapshotHeader is the first entry in our snapshot 74 type snapshotHeader struct { 75 } 76 77 // NewFSMPath is used to construct a new FSM with a blank state 78 func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, 79 blocked *BlockedEvals, logOutput io.Writer) (*nomadFSM, error) { 80 // Create a state store 81 state, err := state.NewStateStore(logOutput) 82 if err != nil { 83 return nil, err 84 } 85 86 fsm := &nomadFSM{ 87 evalBroker: evalBroker, 88 periodicDispatcher: periodic, 89 blockedEvals: blocked, 90 logOutput: logOutput, 91 logger: log.New(logOutput, "", log.LstdFlags), 92 state: state, 93 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 94 } 95 return fsm, nil 96 } 97 98 // Close is used to cleanup resources associated with the FSM 99 func (n *nomadFSM) Close() error { 100 return nil 101 } 102 103 // State is used to return a handle to the current state 104 func (n *nomadFSM) State() *state.StateStore { 105 n.stateLock.RLock() 106 defer n.stateLock.RUnlock() 107 return n.state 108 } 109 110 // TimeTable returns the time table of transactions 111 func (n *nomadFSM) TimeTable() *TimeTable { 112 return n.timetable 113 } 114 115 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 116 buf := log.Data 117 msgType := structs.MessageType(buf[0]) 118 119 // Witness this write 120 n.timetable.Witness(log.Index, time.Now().UTC()) 121 122 // Check if this message type should be ignored when unknown. This is 123 // used so that new commands can be added with developer control if older 124 // versions can safely ignore the command, or if they should crash. 125 ignoreUnknown := false 126 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 127 msgType &= ^structs.IgnoreUnknownTypeFlag 128 ignoreUnknown = true 129 } 130 131 switch msgType { 132 case structs.NodeRegisterRequestType: 133 return n.applyUpsertNode(buf[1:], log.Index) 134 case structs.NodeDeregisterRequestType: 135 return n.applyDeregisterNode(buf[1:], log.Index) 136 case structs.NodeUpdateStatusRequestType: 137 return n.applyStatusUpdate(buf[1:], log.Index) 138 case structs.NodeUpdateDrainRequestType: 139 return n.applyDrainUpdate(buf[1:], log.Index) 140 case structs.JobRegisterRequestType: 141 return n.applyUpsertJob(buf[1:], log.Index) 142 case structs.JobDeregisterRequestType: 143 return n.applyDeregisterJob(buf[1:], log.Index) 144 case structs.EvalUpdateRequestType: 145 return n.applyUpdateEval(buf[1:], log.Index) 146 case structs.EvalDeleteRequestType: 147 return n.applyDeleteEval(buf[1:], log.Index) 148 case structs.AllocUpdateRequestType: 149 return n.applyAllocUpdate(buf[1:], log.Index) 150 case structs.AllocClientUpdateRequestType: 151 return n.applyAllocClientUpdate(buf[1:], log.Index) 152 case structs.ReconcileJobSummariesRequestType: 153 return n.applyReconcileSummaries(buf[1:], log.Index) 154 case structs.VaultAccessorRegisterRequestType: 155 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 156 case structs.VaultAccessorDegisterRequestType: 157 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 158 case structs.ApplyPlanResultsRequestType: 159 return n.applyPlanResults(buf[1:], log.Index) 160 case structs.DeploymentStatusUpdateRequestType: 161 return n.applyDeploymentStatusUpdate(buf[1:], log.Index) 162 case structs.DeploymentPromoteRequestType: 163 return n.applyDeploymentPromotion(buf[1:], log.Index) 164 case structs.DeploymentAllocHealthRequestType: 165 return n.applyDeploymentAllocHealth(buf[1:], log.Index) 166 case structs.DeploymentDeleteRequestType: 167 return n.applyDeploymentDelete(buf[1:], log.Index) 168 case structs.JobStabilityRequestType: 169 return n.applyJobStability(buf[1:], log.Index) 170 default: 171 if ignoreUnknown { 172 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 173 return nil 174 } else { 175 panic(fmt.Errorf("failed to apply request: %#v", buf)) 176 } 177 } 178 } 179 180 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 181 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 182 var req structs.NodeRegisterRequest 183 if err := structs.Decode(buf, &req); err != nil { 184 panic(fmt.Errorf("failed to decode request: %v", err)) 185 } 186 187 if err := n.state.UpsertNode(index, req.Node); err != nil { 188 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 189 return err 190 } 191 192 // Unblock evals for the nodes computed node class if it is in a ready 193 // state. 194 if req.Node.Status == structs.NodeStatusReady { 195 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 196 } 197 198 return nil 199 } 200 201 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 202 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 203 var req structs.NodeDeregisterRequest 204 if err := structs.Decode(buf, &req); err != nil { 205 panic(fmt.Errorf("failed to decode request: %v", err)) 206 } 207 208 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 209 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 210 return err 211 } 212 return nil 213 } 214 215 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 216 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 217 var req structs.NodeUpdateStatusRequest 218 if err := structs.Decode(buf, &req); err != nil { 219 panic(fmt.Errorf("failed to decode request: %v", err)) 220 } 221 222 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 223 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 224 return err 225 } 226 227 // Unblock evals for the nodes computed node class if it is in a ready 228 // state. 229 if req.Status == structs.NodeStatusReady { 230 ws := memdb.NewWatchSet() 231 node, err := n.state.NodeByID(ws, req.NodeID) 232 if err != nil { 233 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 234 return err 235 236 } 237 n.blockedEvals.Unblock(node.ComputedClass, index) 238 } 239 240 return nil 241 } 242 243 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 244 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 245 var req structs.NodeUpdateDrainRequest 246 if err := structs.Decode(buf, &req); err != nil { 247 panic(fmt.Errorf("failed to decode request: %v", err)) 248 } 249 250 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 251 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 252 return err 253 } 254 return nil 255 } 256 257 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 258 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 259 var req structs.JobRegisterRequest 260 if err := structs.Decode(buf, &req); err != nil { 261 panic(fmt.Errorf("failed to decode request: %v", err)) 262 } 263 264 /* Handle upgrade paths: 265 * - Empty maps and slices should be treated as nil to avoid 266 * un-intended destructive updates in scheduler since we use 267 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 268 * the incoming job. 269 * - Migrate from old style upgrade stanza that used only a stagger. 270 */ 271 req.Job.Canonicalize() 272 273 if err := n.state.UpsertJob(index, req.Job); err != nil { 274 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 275 return err 276 } 277 278 // We always add the job to the periodic dispatcher because there is the 279 // possibility that the periodic spec was removed and then we should stop 280 // tracking it. 281 if err := n.periodicDispatcher.Add(req.Job); err != nil { 282 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 283 return err 284 } 285 286 // Create a watch set 287 ws := memdb.NewWatchSet() 288 289 // If it is periodic, record the time it was inserted. This is necessary for 290 // recovering during leader election. It is possible that from the time it 291 // is added to when it was suppose to launch, leader election occurs and the 292 // job was not launched. In this case, we use the insertion time to 293 // determine if a launch was missed. 294 if req.Job.IsPeriodic() { 295 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Job.ID) 296 if err != nil { 297 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 298 return err 299 } 300 301 // Record the insertion time as a launch. We overload the launch table 302 // such that the first entry is the insertion time. 303 if prevLaunch == nil { 304 launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} 305 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 306 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 307 return err 308 } 309 } 310 } 311 312 // Check if the parent job is periodic and mark the launch time. 313 parentID := req.Job.ParentID 314 if parentID != "" { 315 parent, err := n.state.JobByID(ws, parentID) 316 if err != nil { 317 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 318 return err 319 } else if parent == nil { 320 // The parent has been deregistered. 321 return nil 322 } 323 324 if parent.IsPeriodic() && !parent.IsParameterized() { 325 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 326 if err != nil { 327 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 328 return err 329 } 330 331 launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} 332 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 333 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 334 return err 335 } 336 } 337 } 338 339 return nil 340 } 341 342 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 343 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 344 var req structs.JobDeregisterRequest 345 if err := structs.Decode(buf, &req); err != nil { 346 panic(fmt.Errorf("failed to decode request: %v", err)) 347 } 348 349 // If it is periodic remove it from the dispatcher 350 if err := n.periodicDispatcher.Remove(req.JobID); err != nil { 351 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 352 return err 353 } 354 355 if req.Purge { 356 if err := n.state.DeleteJob(index, req.JobID); err != nil { 357 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 358 return err 359 } 360 361 // We always delete from the periodic launch table because it is possible that 362 // the job was updated to be non-perioidic, thus checking if it is periodic 363 // doesn't ensure we clean it up properly. 364 n.state.DeletePeriodicLaunch(index, req.JobID) 365 } else { 366 // Get the current job and mark it as stopped and re-insert it. 367 ws := memdb.NewWatchSet() 368 current, err := n.state.JobByID(ws, req.JobID) 369 if err != nil { 370 n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err) 371 return err 372 } 373 374 if current == nil { 375 return fmt.Errorf("job %q doesn't exist to be deregistered", req.JobID) 376 } 377 378 stopped := current.Copy() 379 stopped.Stop = true 380 381 if err := n.state.UpsertJob(index, stopped); err != nil { 382 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 383 return err 384 } 385 } 386 387 return nil 388 } 389 390 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 391 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 392 var req structs.EvalUpdateRequest 393 if err := structs.Decode(buf, &req); err != nil { 394 panic(fmt.Errorf("failed to decode request: %v", err)) 395 } 396 397 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 398 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 399 return err 400 } 401 402 for _, eval := range req.Evals { 403 if eval.ShouldEnqueue() { 404 n.evalBroker.Enqueue(eval) 405 } else if eval.ShouldBlock() { 406 n.blockedEvals.Block(eval) 407 } else if eval.Status == structs.EvalStatusComplete && 408 len(eval.FailedTGAllocs) == 0 { 409 // If we have a successful evaluation for a node, untrack any 410 // blocked evaluation 411 n.blockedEvals.Untrack(eval.JobID) 412 } 413 } 414 return nil 415 } 416 417 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 418 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 419 var req structs.EvalDeleteRequest 420 if err := structs.Decode(buf, &req); err != nil { 421 panic(fmt.Errorf("failed to decode request: %v", err)) 422 } 423 424 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 425 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 426 return err 427 } 428 return nil 429 } 430 431 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 432 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 433 var req structs.AllocUpdateRequest 434 if err := structs.Decode(buf, &req); err != nil { 435 panic(fmt.Errorf("failed to decode request: %v", err)) 436 } 437 438 // Attach the job to all the allocations. It is pulled out in the 439 // payload to avoid the redundancy of encoding, but should be denormalized 440 // prior to being inserted into MemDB. 441 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 442 443 // Calculate the total resources of allocations. It is pulled out in the 444 // payload to avoid encoding something that can be computed, but should be 445 // denormalized prior to being inserted into MemDB. 446 for _, alloc := range req.Alloc { 447 if alloc.Resources != nil { 448 // COMPAT 0.4.1 -> 0.5 449 // Set the shared resources for allocations which don't have them 450 if alloc.SharedResources == nil { 451 alloc.SharedResources = &structs.Resources{ 452 DiskMB: alloc.Resources.DiskMB, 453 } 454 } 455 456 continue 457 } 458 459 alloc.Resources = new(structs.Resources) 460 for _, task := range alloc.TaskResources { 461 alloc.Resources.Add(task) 462 } 463 464 // Add the shared resources 465 alloc.Resources.Add(alloc.SharedResources) 466 } 467 468 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 469 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 470 return err 471 } 472 return nil 473 } 474 475 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 476 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 477 var req structs.AllocUpdateRequest 478 if err := structs.Decode(buf, &req); err != nil { 479 panic(fmt.Errorf("failed to decode request: %v", err)) 480 } 481 if len(req.Alloc) == 0 { 482 return nil 483 } 484 485 // Create a watch set 486 ws := memdb.NewWatchSet() 487 488 // Updating the allocs with the job id and task group name 489 for _, alloc := range req.Alloc { 490 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 491 alloc.JobID = existing.JobID 492 alloc.TaskGroup = existing.TaskGroup 493 } 494 } 495 496 // Update all the client allocations 497 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 498 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 499 return err 500 } 501 502 // Unblock evals for the nodes computed node class if the client has 503 // finished running an allocation. 504 for _, alloc := range req.Alloc { 505 if alloc.ClientStatus == structs.AllocClientStatusComplete || 506 alloc.ClientStatus == structs.AllocClientStatusFailed { 507 nodeID := alloc.NodeID 508 node, err := n.state.NodeByID(ws, nodeID) 509 if err != nil || node == nil { 510 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 511 return err 512 513 } 514 n.blockedEvals.Unblock(node.ComputedClass, index) 515 } 516 } 517 518 return nil 519 } 520 521 // applyReconcileSummaries reconciles summaries for all the jobs 522 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 523 if err := n.state.ReconcileJobSummaries(index); err != nil { 524 return err 525 } 526 return n.reconcileQueuedAllocations(index) 527 } 528 529 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 530 // and task 531 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 532 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 533 var req structs.VaultAccessorsRequest 534 if err := structs.Decode(buf, &req); err != nil { 535 panic(fmt.Errorf("failed to decode request: %v", err)) 536 } 537 538 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 539 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 540 return err 541 } 542 543 return nil 544 } 545 546 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 547 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 548 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 549 var req structs.VaultAccessorsRequest 550 if err := structs.Decode(buf, &req); err != nil { 551 panic(fmt.Errorf("failed to decode request: %v", err)) 552 } 553 554 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 555 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 556 return err 557 } 558 559 return nil 560 } 561 562 // applyPlanApply applies the results of a plan application 563 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 564 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 565 var req structs.ApplyPlanResultsRequest 566 if err := structs.Decode(buf, &req); err != nil { 567 panic(fmt.Errorf("failed to decode request: %v", err)) 568 } 569 570 if err := n.state.UpsertPlanResults(index, &req); err != nil { 571 n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err) 572 return err 573 } 574 575 return nil 576 } 577 578 // applyDeploymentStatusUpdate is used to update the status of an existing 579 // deployment 580 func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} { 581 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now()) 582 var req structs.DeploymentStatusUpdateRequest 583 if err := structs.Decode(buf, &req); err != nil { 584 panic(fmt.Errorf("failed to decode request: %v", err)) 585 } 586 587 if err := n.state.UpdateDeploymentStatus(index, &req); err != nil { 588 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentStatusUpdate failed: %v", err) 589 return err 590 } 591 592 if req.Eval != nil && req.Eval.ShouldEnqueue() { 593 n.evalBroker.Enqueue(req.Eval) 594 } 595 596 return nil 597 } 598 599 // applyDeploymentPromotion is used to promote canaries in a deployment 600 func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} { 601 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now()) 602 var req structs.ApplyDeploymentPromoteRequest 603 if err := structs.Decode(buf, &req); err != nil { 604 panic(fmt.Errorf("failed to decode request: %v", err)) 605 } 606 607 if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil { 608 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentPromotion failed: %v", err) 609 return err 610 } 611 612 if req.Eval != nil && req.Eval.ShouldEnqueue() { 613 n.evalBroker.Enqueue(req.Eval) 614 } 615 616 return nil 617 } 618 619 // applyDeploymentAllocHealth is used to set the health of allocations as part 620 // of a deployment 621 func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} { 622 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now()) 623 var req structs.ApplyDeploymentAllocHealthRequest 624 if err := structs.Decode(buf, &req); err != nil { 625 panic(fmt.Errorf("failed to decode request: %v", err)) 626 } 627 628 if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil { 629 n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentAllocHealth failed: %v", err) 630 return err 631 } 632 633 if req.Eval != nil && req.Eval.ShouldEnqueue() { 634 n.evalBroker.Enqueue(req.Eval) 635 } 636 637 return nil 638 } 639 640 // applyDeploymentDelete is used to delete a set of deployments 641 func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} { 642 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now()) 643 var req structs.DeploymentDeleteRequest 644 if err := structs.Decode(buf, &req); err != nil { 645 panic(fmt.Errorf("failed to decode request: %v", err)) 646 } 647 648 if err := n.state.DeleteDeployment(index, req.Deployments); err != nil { 649 n.logger.Printf("[ERR] nomad.fsm: DeleteDeployment failed: %v", err) 650 return err 651 } 652 653 return nil 654 } 655 656 // applyJobStability is used to set the stability of a job 657 func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} { 658 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now()) 659 var req structs.JobStabilityRequest 660 if err := structs.Decode(buf, &req); err != nil { 661 panic(fmt.Errorf("failed to decode request: %v", err)) 662 } 663 664 if err := n.state.UpdateJobStability(index, req.JobID, req.JobVersion, req.Stable); err != nil { 665 n.logger.Printf("[ERR] nomad.fsm: UpdateJobStability failed: %v", err) 666 return err 667 } 668 669 return nil 670 } 671 672 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 673 // Create a new snapshot 674 snap, err := n.state.Snapshot() 675 if err != nil { 676 return nil, err 677 } 678 679 ns := &nomadSnapshot{ 680 snap: snap, 681 timetable: n.timetable, 682 } 683 return ns, nil 684 } 685 686 func (n *nomadFSM) Restore(old io.ReadCloser) error { 687 defer old.Close() 688 689 // Create a new state store 690 newState, err := state.NewStateStore(n.logOutput) 691 if err != nil { 692 return err 693 } 694 695 // Start the state restore 696 restore, err := newState.Restore() 697 if err != nil { 698 return err 699 } 700 defer restore.Abort() 701 702 // Create a decoder 703 dec := codec.NewDecoder(old, structs.MsgpackHandle) 704 705 // Read in the header 706 var header snapshotHeader 707 if err := dec.Decode(&header); err != nil { 708 return err 709 } 710 711 // Populate the new state 712 msgType := make([]byte, 1) 713 for { 714 // Read the message type 715 _, err := old.Read(msgType) 716 if err == io.EOF { 717 break 718 } else if err != nil { 719 return err 720 } 721 722 // Decode 723 switch SnapshotType(msgType[0]) { 724 case TimeTableSnapshot: 725 if err := n.timetable.Deserialize(dec); err != nil { 726 return fmt.Errorf("time table deserialize failed: %v", err) 727 } 728 729 case NodeSnapshot: 730 node := new(structs.Node) 731 if err := dec.Decode(node); err != nil { 732 return err 733 } 734 if err := restore.NodeRestore(node); err != nil { 735 return err 736 } 737 738 case JobSnapshot: 739 job := new(structs.Job) 740 if err := dec.Decode(job); err != nil { 741 return err 742 } 743 744 /* Handle upgrade paths: 745 * - Empty maps and slices should be treated as nil to avoid 746 * un-intended destructive updates in scheduler since we use 747 * reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 748 * the incoming job. 749 * - Migrate from old style upgrade stanza that used only a stagger. 750 */ 751 job.Canonicalize() 752 753 if err := restore.JobRestore(job); err != nil { 754 return err 755 } 756 757 case EvalSnapshot: 758 eval := new(structs.Evaluation) 759 if err := dec.Decode(eval); err != nil { 760 return err 761 } 762 if err := restore.EvalRestore(eval); err != nil { 763 return err 764 } 765 766 case AllocSnapshot: 767 alloc := new(structs.Allocation) 768 if err := dec.Decode(alloc); err != nil { 769 return err 770 } 771 if err := restore.AllocRestore(alloc); err != nil { 772 return err 773 } 774 775 case IndexSnapshot: 776 idx := new(state.IndexEntry) 777 if err := dec.Decode(idx); err != nil { 778 return err 779 } 780 if err := restore.IndexRestore(idx); err != nil { 781 return err 782 } 783 784 case PeriodicLaunchSnapshot: 785 launch := new(structs.PeriodicLaunch) 786 if err := dec.Decode(launch); err != nil { 787 return err 788 } 789 if err := restore.PeriodicLaunchRestore(launch); err != nil { 790 return err 791 } 792 793 case JobSummarySnapshot: 794 summary := new(structs.JobSummary) 795 if err := dec.Decode(summary); err != nil { 796 return err 797 } 798 if err := restore.JobSummaryRestore(summary); err != nil { 799 return err 800 } 801 802 case VaultAccessorSnapshot: 803 accessor := new(structs.VaultAccessor) 804 if err := dec.Decode(accessor); err != nil { 805 return err 806 } 807 if err := restore.VaultAccessorRestore(accessor); err != nil { 808 return err 809 } 810 811 case JobVersionSnapshot: 812 version := new(structs.Job) 813 if err := dec.Decode(version); err != nil { 814 return err 815 } 816 if err := restore.JobVersionRestore(version); err != nil { 817 return err 818 } 819 820 case DeploymentSnapshot: 821 deployment := new(structs.Deployment) 822 if err := dec.Decode(deployment); err != nil { 823 return err 824 } 825 if err := restore.DeploymentRestore(deployment); err != nil { 826 return err 827 } 828 829 default: 830 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 831 } 832 } 833 834 restore.Commit() 835 836 // Create Job Summaries 837 // COMPAT 0.4 -> 0.4.1 838 // We can remove this in 0.5. This exists so that the server creates job 839 // summaries if they were not present previously. When users upgrade to 0.5 840 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 841 // remove this block. 842 index, err := newState.Index("job_summary") 843 if err != nil { 844 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 845 } 846 847 // If the index is 0 that means there is no job summary in the snapshot so 848 // we will have to create them 849 if index == 0 { 850 // query the latest index 851 latestIndex, err := newState.LatestIndex() 852 if err != nil { 853 return fmt.Errorf("unable to query latest index: %v", index) 854 } 855 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 856 return fmt.Errorf("error reconciling summaries: %v", err) 857 } 858 } 859 860 // External code might be calling State(), so we need to synchronize 861 // here to make sure we swap in the new state store atomically. 862 n.stateLock.Lock() 863 stateOld := n.state 864 n.state = newState 865 n.stateLock.Unlock() 866 867 // Signal that the old state store has been abandoned. This is required 868 // because we don't operate on it any more, we just throw it away, so 869 // blocking queries won't see any changes and need to be woken up. 870 stateOld.Abandon() 871 872 return nil 873 } 874 875 // reconcileSummaries re-calculates the queued allocations for every job that we 876 // created a Job Summary during the snap shot restore 877 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 878 // Get all the jobs 879 ws := memdb.NewWatchSet() 880 iter, err := n.state.Jobs(ws) 881 if err != nil { 882 return err 883 } 884 885 snap, err := n.state.Snapshot() 886 if err != nil { 887 return fmt.Errorf("unable to create snapshot: %v", err) 888 } 889 890 // Invoking the scheduler for every job so that we can populate the number 891 // of queued allocations for every job 892 for { 893 rawJob := iter.Next() 894 if rawJob == nil { 895 break 896 } 897 job := rawJob.(*structs.Job) 898 planner := &scheduler.Harness{ 899 State: &snap.StateStore, 900 } 901 // Create an eval and mark it as requiring annotations and insert that as well 902 eval := &structs.Evaluation{ 903 ID: structs.GenerateUUID(), 904 Priority: job.Priority, 905 Type: job.Type, 906 TriggeredBy: structs.EvalTriggerJobRegister, 907 JobID: job.ID, 908 JobModifyIndex: job.JobModifyIndex + 1, 909 Status: structs.EvalStatusPending, 910 AnnotatePlan: true, 911 } 912 913 // Create the scheduler and run it 914 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 915 if err != nil { 916 return err 917 } 918 919 if err := sched.Process(eval); err != nil { 920 return err 921 } 922 923 // Get the job summary from the fsm state store 924 originalSummary, err := n.state.JobSummaryByID(ws, job.ID) 925 if err != nil { 926 return err 927 } 928 summary := originalSummary.Copy() 929 930 // Add the allocations scheduler has made to queued since these 931 // allocations are never getting placed until the scheduler is invoked 932 // with a real planner 933 if l := len(planner.Plans); l != 1 { 934 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 935 } 936 for _, allocations := range planner.Plans[0].NodeAllocation { 937 for _, allocation := range allocations { 938 tgSummary, ok := summary.Summary[allocation.TaskGroup] 939 if !ok { 940 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 941 } 942 tgSummary.Queued += 1 943 summary.Summary[allocation.TaskGroup] = tgSummary 944 } 945 } 946 947 // Add the queued allocations attached to the evaluation to the queued 948 // counter of the job summary 949 if l := len(planner.Evals); l != 1 { 950 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 951 } 952 for tg, queued := range planner.Evals[0].QueuedAllocations { 953 tgSummary, ok := summary.Summary[tg] 954 if !ok { 955 return fmt.Errorf("task group %q not found while updating queued count", tg) 956 } 957 958 // We add instead of setting here because we want to take into 959 // consideration what the scheduler with a mock planner thinks it 960 // placed. Those should be counted as queued as well 961 tgSummary.Queued += queued 962 summary.Summary[tg] = tgSummary 963 } 964 965 if !reflect.DeepEqual(summary, originalSummary) { 966 summary.ModifyIndex = index 967 if err := n.state.UpsertJobSummary(index, summary); err != nil { 968 return err 969 } 970 } 971 } 972 return nil 973 } 974 975 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 976 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 977 // Register the nodes 978 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 979 980 // Write the header 981 header := snapshotHeader{} 982 if err := encoder.Encode(&header); err != nil { 983 sink.Cancel() 984 return err 985 } 986 987 // Write the time table 988 sink.Write([]byte{byte(TimeTableSnapshot)}) 989 if err := s.timetable.Serialize(encoder); err != nil { 990 sink.Cancel() 991 return err 992 } 993 994 // Write all the data out 995 if err := s.persistIndexes(sink, encoder); err != nil { 996 sink.Cancel() 997 return err 998 } 999 if err := s.persistNodes(sink, encoder); err != nil { 1000 sink.Cancel() 1001 return err 1002 } 1003 if err := s.persistJobs(sink, encoder); err != nil { 1004 sink.Cancel() 1005 return err 1006 } 1007 if err := s.persistEvals(sink, encoder); err != nil { 1008 sink.Cancel() 1009 return err 1010 } 1011 if err := s.persistAllocs(sink, encoder); err != nil { 1012 sink.Cancel() 1013 return err 1014 } 1015 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 1016 sink.Cancel() 1017 return err 1018 } 1019 if err := s.persistJobSummaries(sink, encoder); err != nil { 1020 sink.Cancel() 1021 return err 1022 } 1023 if err := s.persistVaultAccessors(sink, encoder); err != nil { 1024 sink.Cancel() 1025 return err 1026 } 1027 if err := s.persistJobVersions(sink, encoder); err != nil { 1028 sink.Cancel() 1029 return err 1030 } 1031 if err := s.persistDeployments(sink, encoder); err != nil { 1032 sink.Cancel() 1033 return err 1034 } 1035 return nil 1036 } 1037 1038 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 1039 encoder *codec.Encoder) error { 1040 // Get all the indexes 1041 iter, err := s.snap.Indexes() 1042 if err != nil { 1043 return err 1044 } 1045 1046 for { 1047 // Get the next item 1048 raw := iter.Next() 1049 if raw == nil { 1050 break 1051 } 1052 1053 // Prepare the request struct 1054 idx := raw.(*state.IndexEntry) 1055 1056 // Write out a node registration 1057 sink.Write([]byte{byte(IndexSnapshot)}) 1058 if err := encoder.Encode(idx); err != nil { 1059 return err 1060 } 1061 } 1062 return nil 1063 } 1064 1065 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 1066 encoder *codec.Encoder) error { 1067 // Get all the nodes 1068 ws := memdb.NewWatchSet() 1069 nodes, err := s.snap.Nodes(ws) 1070 if err != nil { 1071 return err 1072 } 1073 1074 for { 1075 // Get the next item 1076 raw := nodes.Next() 1077 if raw == nil { 1078 break 1079 } 1080 1081 // Prepare the request struct 1082 node := raw.(*structs.Node) 1083 1084 // Write out a node registration 1085 sink.Write([]byte{byte(NodeSnapshot)}) 1086 if err := encoder.Encode(node); err != nil { 1087 return err 1088 } 1089 } 1090 return nil 1091 } 1092 1093 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 1094 encoder *codec.Encoder) error { 1095 // Get all the jobs 1096 ws := memdb.NewWatchSet() 1097 jobs, err := s.snap.Jobs(ws) 1098 if err != nil { 1099 return err 1100 } 1101 1102 for { 1103 // Get the next item 1104 raw := jobs.Next() 1105 if raw == nil { 1106 break 1107 } 1108 1109 // Prepare the request struct 1110 job := raw.(*structs.Job) 1111 1112 // Write out a job registration 1113 sink.Write([]byte{byte(JobSnapshot)}) 1114 if err := encoder.Encode(job); err != nil { 1115 return err 1116 } 1117 } 1118 return nil 1119 } 1120 1121 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1122 encoder *codec.Encoder) error { 1123 // Get all the evaluations 1124 ws := memdb.NewWatchSet() 1125 evals, err := s.snap.Evals(ws) 1126 if err != nil { 1127 return err 1128 } 1129 1130 for { 1131 // Get the next item 1132 raw := evals.Next() 1133 if raw == nil { 1134 break 1135 } 1136 1137 // Prepare the request struct 1138 eval := raw.(*structs.Evaluation) 1139 1140 // Write out the evaluation 1141 sink.Write([]byte{byte(EvalSnapshot)}) 1142 if err := encoder.Encode(eval); err != nil { 1143 return err 1144 } 1145 } 1146 return nil 1147 } 1148 1149 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1150 encoder *codec.Encoder) error { 1151 // Get all the allocations 1152 ws := memdb.NewWatchSet() 1153 allocs, err := s.snap.Allocs(ws) 1154 if err != nil { 1155 return err 1156 } 1157 1158 for { 1159 // Get the next item 1160 raw := allocs.Next() 1161 if raw == nil { 1162 break 1163 } 1164 1165 // Prepare the request struct 1166 alloc := raw.(*structs.Allocation) 1167 1168 // Write out the evaluation 1169 sink.Write([]byte{byte(AllocSnapshot)}) 1170 if err := encoder.Encode(alloc); err != nil { 1171 return err 1172 } 1173 } 1174 return nil 1175 } 1176 1177 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1178 encoder *codec.Encoder) error { 1179 // Get all the jobs 1180 ws := memdb.NewWatchSet() 1181 launches, err := s.snap.PeriodicLaunches(ws) 1182 if err != nil { 1183 return err 1184 } 1185 1186 for { 1187 // Get the next item 1188 raw := launches.Next() 1189 if raw == nil { 1190 break 1191 } 1192 1193 // Prepare the request struct 1194 launch := raw.(*structs.PeriodicLaunch) 1195 1196 // Write out a job registration 1197 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1198 if err := encoder.Encode(launch); err != nil { 1199 return err 1200 } 1201 } 1202 return nil 1203 } 1204 1205 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1206 encoder *codec.Encoder) error { 1207 1208 ws := memdb.NewWatchSet() 1209 summaries, err := s.snap.JobSummaries(ws) 1210 if err != nil { 1211 return err 1212 } 1213 1214 for { 1215 raw := summaries.Next() 1216 if raw == nil { 1217 break 1218 } 1219 1220 jobSummary := raw.(*structs.JobSummary) 1221 1222 sink.Write([]byte{byte(JobSummarySnapshot)}) 1223 if err := encoder.Encode(jobSummary); err != nil { 1224 return err 1225 } 1226 } 1227 return nil 1228 } 1229 1230 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1231 encoder *codec.Encoder) error { 1232 1233 ws := memdb.NewWatchSet() 1234 accessors, err := s.snap.VaultAccessors(ws) 1235 if err != nil { 1236 return err 1237 } 1238 1239 for { 1240 raw := accessors.Next() 1241 if raw == nil { 1242 break 1243 } 1244 1245 accessor := raw.(*structs.VaultAccessor) 1246 1247 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1248 if err := encoder.Encode(accessor); err != nil { 1249 return err 1250 } 1251 } 1252 return nil 1253 } 1254 1255 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 1256 encoder *codec.Encoder) error { 1257 // Get all the jobs 1258 ws := memdb.NewWatchSet() 1259 versions, err := s.snap.JobVersions(ws) 1260 if err != nil { 1261 return err 1262 } 1263 1264 for { 1265 // Get the next item 1266 raw := versions.Next() 1267 if raw == nil { 1268 break 1269 } 1270 1271 // Prepare the request struct 1272 job := raw.(*structs.Job) 1273 1274 // Write out a job registration 1275 sink.Write([]byte{byte(JobVersionSnapshot)}) 1276 if err := encoder.Encode(job); err != nil { 1277 return err 1278 } 1279 } 1280 return nil 1281 } 1282 1283 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 1284 encoder *codec.Encoder) error { 1285 // Get all the jobs 1286 ws := memdb.NewWatchSet() 1287 deployments, err := s.snap.Deployments(ws) 1288 if err != nil { 1289 return err 1290 } 1291 1292 for { 1293 // Get the next item 1294 raw := deployments.Next() 1295 if raw == nil { 1296 break 1297 } 1298 1299 // Prepare the request struct 1300 deployment := raw.(*structs.Deployment) 1301 1302 // Write out a job registration 1303 sink.Write([]byte{byte(DeploymentSnapshot)}) 1304 if err := encoder.Encode(deployment); err != nil { 1305 return err 1306 } 1307 } 1308 return nil 1309 } 1310 1311 // Release is a no-op, as we just need to GC the pointer 1312 // to the state store snapshot. There is nothing to explicitly 1313 // cleanup. 1314 func (s *nomadSnapshot) Release() {}