github.com/david-imola/snapd@v0.0.0-20210611180407-2de8ddeece6d/overlord/state/change.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2016 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package state 21 22 import ( 23 "bytes" 24 "encoding/json" 25 "fmt" 26 "strings" 27 "time" 28 ) 29 30 // Status is used for status values for changes and tasks. 31 type Status int 32 33 // Admitted status values for changes and tasks. 34 const ( 35 // DefaultStatus is the standard computed status for a change or task. 36 // For tasks it's always mapped to DoStatus, and for change its mapped 37 // to an aggregation of its tasks' statuses. See Change.Status for details. 38 DefaultStatus Status = 0 39 40 // HoldStatus means the task should not run, perhaps as a consequence of an error on another task. 41 HoldStatus Status = 1 42 43 // DoStatus means the change or task is ready to start. 44 DoStatus Status = 2 45 46 // DoingStatus means the change or task is running or an attempt was made to run it. 47 DoingStatus Status = 3 48 49 // DoneStatus means the change or task was accomplished successfully. 50 DoneStatus Status = 4 51 52 // AbortStatus means the task should stop doing its activities and then undo. 53 AbortStatus Status = 5 54 55 // UndoStatus means the change or task should be undone, probably due to an error elsewhere. 56 UndoStatus Status = 6 57 58 // UndoingStatus means the change or task is being undone or an attempt was made to undo it. 59 UndoingStatus Status = 7 60 61 // UndoneStatus means a task was first done and then undone after an error elsewhere. 62 // Changes go directly into the error status instead of being marked as undone. 63 UndoneStatus Status = 8 64 65 // ErrorStatus means the change or task has errored out while running or being undone. 66 ErrorStatus Status = 9 67 68 nStatuses = iota 69 ) 70 71 // Ready returns whether a task or change with this status needs further 72 // work or has completed its attempt to perform the current goal. 73 func (s Status) Ready() bool { 74 switch s { 75 case DoneStatus, UndoneStatus, HoldStatus, ErrorStatus: 76 return true 77 } 78 return false 79 } 80 81 func (s Status) String() string { 82 switch s { 83 case DefaultStatus: 84 return "Default" 85 case DoStatus: 86 return "Do" 87 case DoingStatus: 88 return "Doing" 89 case DoneStatus: 90 return "Done" 91 case AbortStatus: 92 return "Abort" 93 case UndoStatus: 94 return "Undo" 95 case UndoingStatus: 96 return "Undoing" 97 case UndoneStatus: 98 return "Undone" 99 case HoldStatus: 100 return "Hold" 101 case ErrorStatus: 102 return "Error" 103 } 104 panic(fmt.Sprintf("internal error: unknown task status code: %d", s)) 105 } 106 107 // Change represents a tracked modification to the system state. 108 // 109 // The Change provides both the justification for individual tasks 110 // to be performed and the grouping of them. 111 // 112 // As an example, if an administrator requests an interface connection, 113 // multiple hooks might be individually run to accomplish the task. The 114 // Change summary would reflect the request for an interface connection, 115 // while the individual Task values would track the running of 116 // the hooks themselves. 117 type Change struct { 118 state *State 119 id string 120 kind string 121 summary string 122 status Status 123 clean bool 124 data customData 125 taskIDs []string 126 lanes int 127 ready chan struct{} 128 129 spawnTime time.Time 130 readyTime time.Time 131 } 132 133 type byReadyTime []*Change 134 135 func (a byReadyTime) Len() int { return len(a) } 136 func (a byReadyTime) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 137 func (a byReadyTime) Less(i, j int) bool { return a[i].readyTime.Before(a[j].readyTime) } 138 139 func newChange(state *State, id, kind, summary string) *Change { 140 return &Change{ 141 state: state, 142 id: id, 143 kind: kind, 144 summary: summary, 145 data: make(customData), 146 ready: make(chan struct{}), 147 148 spawnTime: timeNow(), 149 } 150 } 151 152 type marshalledChange struct { 153 ID string `json:"id"` 154 Kind string `json:"kind"` 155 Summary string `json:"summary"` 156 Status Status `json:"status"` 157 Clean bool `json:"clean,omitempty"` 158 Data map[string]*json.RawMessage `json:"data,omitempty"` 159 TaskIDs []string `json:"task-ids,omitempty"` 160 Lanes int `json:"lanes,omitempty"` 161 162 SpawnTime time.Time `json:"spawn-time"` 163 ReadyTime *time.Time `json:"ready-time,omitempty"` 164 } 165 166 // MarshalJSON makes Change a json.Marshaller 167 func (c *Change) MarshalJSON() ([]byte, error) { 168 c.state.reading() 169 var readyTime *time.Time 170 if !c.readyTime.IsZero() { 171 readyTime = &c.readyTime 172 } 173 return json.Marshal(marshalledChange{ 174 ID: c.id, 175 Kind: c.kind, 176 Summary: c.summary, 177 Status: c.status, 178 Clean: c.clean, 179 Data: c.data, 180 TaskIDs: c.taskIDs, 181 Lanes: c.lanes, 182 183 SpawnTime: c.spawnTime, 184 ReadyTime: readyTime, 185 }) 186 } 187 188 // UnmarshalJSON makes Change a json.Unmarshaller 189 func (c *Change) UnmarshalJSON(data []byte) error { 190 if c.state != nil { 191 c.state.writing() 192 } 193 var unmarshalled marshalledChange 194 err := json.Unmarshal(data, &unmarshalled) 195 if err != nil { 196 return err 197 } 198 c.id = unmarshalled.ID 199 c.kind = unmarshalled.Kind 200 c.summary = unmarshalled.Summary 201 c.status = unmarshalled.Status 202 c.clean = unmarshalled.Clean 203 custData := unmarshalled.Data 204 if custData == nil { 205 custData = make(customData) 206 } 207 c.data = custData 208 c.taskIDs = unmarshalled.TaskIDs 209 c.lanes = unmarshalled.Lanes 210 c.ready = make(chan struct{}) 211 c.spawnTime = unmarshalled.SpawnTime 212 if unmarshalled.ReadyTime != nil { 213 c.readyTime = *unmarshalled.ReadyTime 214 } 215 return nil 216 } 217 218 // finishUnmarshal is called after the state and tasks are accessible. 219 func (c *Change) finishUnmarshal() { 220 if c.Status().Ready() { 221 close(c.ready) 222 } 223 } 224 225 // ID returns the individual random key for the change. 226 func (c *Change) ID() string { 227 return c.id 228 } 229 230 // Kind returns the nature of the change for managers to know how to handle it. 231 func (c *Change) Kind() string { 232 return c.kind 233 } 234 235 // Summary returns a summary describing what the change is about. 236 func (c *Change) Summary() string { 237 return c.summary 238 } 239 240 // Set associates value with key for future consulting by managers. 241 // The provided value must properly marshal and unmarshal with encoding/json. 242 func (c *Change) Set(key string, value interface{}) { 243 c.state.writing() 244 c.data.set(key, value) 245 } 246 247 // Get unmarshals the stored value associated with the provided key 248 // into the value parameter. 249 func (c *Change) Get(key string, value interface{}) error { 250 c.state.reading() 251 return c.data.get(key, value) 252 } 253 254 var statusOrder = []Status{ 255 AbortStatus, 256 UndoingStatus, 257 UndoStatus, 258 DoingStatus, 259 DoStatus, 260 ErrorStatus, 261 UndoneStatus, 262 DoneStatus, 263 HoldStatus, 264 } 265 266 func init() { 267 if len(statusOrder) != nStatuses-1 { 268 panic("statusOrder has wrong number of elements") 269 } 270 } 271 272 // Status returns the current status of the change. 273 // If the status was not explicitly set the result is derived from the status 274 // of the individual tasks related to the change, according to the following 275 // decision sequence: 276 // 277 // - With at least one task in DoStatus, return DoStatus 278 // - With at least one task in ErrorStatus, return ErrorStatus 279 // - Otherwise, return DoneStatus 280 // 281 func (c *Change) Status() Status { 282 c.state.reading() 283 if c.status == DefaultStatus { 284 if len(c.taskIDs) == 0 { 285 return HoldStatus 286 } 287 statusStats := make([]int, nStatuses) 288 for _, tid := range c.taskIDs { 289 statusStats[c.state.tasks[tid].Status()]++ 290 } 291 for _, s := range statusOrder { 292 if statusStats[s] > 0 { 293 return s 294 } 295 } 296 panic(fmt.Sprintf("internal error: cannot process change status: %v", statusStats)) 297 } 298 return c.status 299 } 300 301 // SetStatus sets the change status, overriding the default behavior (see Status method). 302 func (c *Change) SetStatus(s Status) { 303 c.state.writing() 304 c.status = s 305 if s.Ready() { 306 c.markReady() 307 } 308 } 309 310 func (c *Change) markReady() { 311 select { 312 case <-c.ready: 313 default: 314 close(c.ready) 315 } 316 if c.readyTime.IsZero() { 317 c.readyTime = timeNow() 318 } 319 } 320 321 // Ready returns a channel that is closed the first time the change becomes ready. 322 func (c *Change) Ready() <-chan struct{} { 323 return c.ready 324 } 325 326 // taskStatusChanged is called by tasks when their status is changed, 327 // to give the opportunity for the change to close its ready channel. 328 func (c *Change) taskStatusChanged(t *Task, old, new Status) { 329 if old.Ready() == new.Ready() { 330 return 331 } 332 for _, tid := range c.taskIDs { 333 task := c.state.tasks[tid] 334 if task != t && !task.status.Ready() { 335 return 336 } 337 } 338 // Here is the exact moment when a change goes from unready to ready, 339 // and from ready to unready. For now handle only the first of those. 340 // For the latter the channel might be replaced in the future. 341 if c.IsReady() && !c.Status().Ready() { 342 panic(fmt.Errorf("change %s unexpectedly became unready (%s)", c.ID(), c.Status())) 343 } 344 c.markReady() 345 } 346 347 // IsClean returns whether all tasks in the change have been cleaned. See SetClean. 348 func (c *Change) IsClean() bool { 349 c.state.reading() 350 return c.clean 351 } 352 353 // IsReady returns whether the change is considered ready. 354 // 355 // The result is similar to calling Ready on the status returned by the Status 356 // method, but this function is more efficient as it doesn't need to recompute 357 // the aggregated state of tasks on every call. 358 // 359 // As an exception, IsReady returns false for a Change without any tasks that 360 // never had its status explicitly set and was never unmarshalled out of the 361 // persistent state, despite its initial status being Hold. This is how the 362 // system represents changes right after they are created. 363 func (c *Change) IsReady() bool { 364 select { 365 case <-c.ready: 366 return true 367 default: 368 } 369 return false 370 } 371 372 func (c *Change) taskCleanChanged() { 373 if !c.IsReady() { 374 panic("internal error: attempted to set a task clean while change not ready") 375 } 376 for _, tid := range c.taskIDs { 377 task := c.state.tasks[tid] 378 if !task.clean { 379 return 380 } 381 } 382 c.clean = true 383 } 384 385 // SpawnTime returns the time when the change was created. 386 func (c *Change) SpawnTime() time.Time { 387 c.state.reading() 388 return c.spawnTime 389 } 390 391 // ReadyTime returns the time when the change became ready. 392 func (c *Change) ReadyTime() time.Time { 393 c.state.reading() 394 return c.readyTime 395 } 396 397 // changeError holds a set of task errors. 398 type changeError struct { 399 errors []taskError 400 } 401 402 type taskError struct { 403 task string 404 error string 405 } 406 407 func (e *changeError) Error() string { 408 var buf bytes.Buffer 409 buf.WriteString("cannot perform the following tasks:\n") 410 for _, te := range e.errors { 411 fmt.Fprintf(&buf, "- %s (%s)\n", te.task, te.error) 412 } 413 return strings.TrimSuffix(buf.String(), "\n") 414 } 415 416 func stripErrorMsg(msg string) (string, bool) { 417 i := strings.Index(msg, " ") 418 if i >= 0 && strings.HasPrefix(msg[i:], " ERROR ") { 419 return msg[i+len(" ERROR "):], true 420 } 421 return "", false 422 } 423 424 // Err returns an error value based on errors that were logged for tasks registered 425 // in this change, or nil if the change is not in ErrorStatus. 426 func (c *Change) Err() error { 427 c.state.reading() 428 if c.Status() != ErrorStatus { 429 return nil 430 } 431 var errors []taskError 432 for _, tid := range c.taskIDs { 433 task := c.state.tasks[tid] 434 if task.Status() != ErrorStatus { 435 continue 436 } 437 for _, msg := range task.Log() { 438 if s, ok := stripErrorMsg(msg); ok { 439 errors = append(errors, taskError{task.Summary(), s}) 440 } 441 } 442 } 443 if len(errors) == 0 { 444 return fmt.Errorf("internal inconsistency: change %q in ErrorStatus with no task errors logged", c.Kind()) 445 } 446 return &changeError{errors} 447 } 448 449 // State returns the system State 450 func (c *Change) State() *State { 451 return c.state 452 } 453 454 // AddTask registers a task as required for the state change to 455 // be accomplished. 456 func (c *Change) AddTask(t *Task) { 457 c.state.writing() 458 if t.change != "" { 459 panic(fmt.Sprintf("internal error: cannot add one %q task to multiple changes", t.Kind())) 460 } 461 t.change = c.id 462 c.taskIDs = addOnce(c.taskIDs, t.ID()) 463 } 464 465 // AddAll registers all tasks in the set as required for the state 466 // change to be accomplished. 467 func (c *Change) AddAll(ts *TaskSet) { 468 c.state.writing() 469 for _, t := range ts.tasks { 470 c.AddTask(t) 471 } 472 } 473 474 // Tasks returns all the tasks this state change depends on. 475 func (c *Change) Tasks() []*Task { 476 c.state.reading() 477 return c.state.tasksIn(c.taskIDs) 478 } 479 480 // LaneTasks returns all tasks from given lanes the state change depends on. 481 func (c *Change) LaneTasks(lanes ...int) []*Task { 482 laneLookup := make(map[int]bool) 483 for _, l := range lanes { 484 laneLookup[l] = true 485 } 486 487 c.state.reading() 488 var tasks []*Task 489 for _, tid := range c.taskIDs { 490 t := c.state.tasks[tid] 491 if len(t.lanes) == 0 && laneLookup[0] { 492 tasks = append(tasks, t) 493 } 494 for _, l := range t.lanes { 495 if laneLookup[l] { 496 tasks = append(tasks, t) 497 break 498 } 499 } 500 } 501 return tasks 502 } 503 504 // Abort flags the change for cancellation, whether in progress or not. 505 // Cancellation will proceed at the next ensure pass. 506 func (c *Change) Abort() { 507 c.state.writing() 508 tasks := make([]*Task, len(c.taskIDs)) 509 for i, tid := range c.taskIDs { 510 tasks[i] = c.state.tasks[tid] 511 } 512 c.abortTasks(tasks, make(map[int]bool), make(map[string]bool)) 513 } 514 515 // AbortLanes aborts all tasks in the provided lanes and any tasks waiting on them, 516 // except for tasks that are also in a healthy lane (not aborted, and not waiting 517 // on aborted). 518 func (c *Change) AbortLanes(lanes []int) { 519 c.state.writing() 520 c.abortLanes(lanes, make(map[int]bool), make(map[string]bool)) 521 } 522 523 func (c *Change) abortLanes(lanes []int, abortedLanes map[int]bool, seenTasks map[string]bool) { 524 var hasLive = make(map[int]bool) 525 var hasDead = make(map[int]bool) 526 var laneTasks []*Task 527 NextChangeTask: 528 for _, tid := range c.taskIDs { 529 t := c.state.tasks[tid] 530 531 var live bool 532 switch t.Status() { 533 case DoStatus, DoingStatus, DoneStatus: 534 live = true 535 } 536 537 for _, tlane := range t.Lanes() { 538 for _, lane := range lanes { 539 if tlane == lane { 540 laneTasks = append(laneTasks, t) 541 continue NextChangeTask 542 } 543 } 544 545 // Track opinion about lanes not in the kill list. 546 // If the lane ends up being entirely live, we'll 547 // preserve this task alive too. 548 if live { 549 hasLive[tlane] = true 550 } else { 551 hasDead[tlane] = true 552 } 553 } 554 } 555 556 abortTasks := make([]*Task, 0, len(laneTasks)) 557 NextLaneTask: 558 for _, t := range laneTasks { 559 for _, tlane := range t.Lanes() { 560 if hasLive[tlane] && !hasDead[tlane] { 561 continue NextLaneTask 562 } 563 } 564 abortTasks = append(abortTasks, t) 565 } 566 567 for _, lane := range lanes { 568 abortedLanes[lane] = true 569 } 570 if len(abortTasks) > 0 { 571 c.abortTasks(abortTasks, abortedLanes, seenTasks) 572 } 573 } 574 575 func (c *Change) abortTasks(tasks []*Task, abortedLanes map[int]bool, seenTasks map[string]bool) { 576 var lanes []int 577 for i := 0; i < len(tasks); i++ { 578 t := tasks[i] 579 if seenTasks[t.id] { 580 continue 581 } 582 seenTasks[t.id] = true 583 switch t.Status() { 584 case DoStatus: 585 // Still pending so don't even start. 586 t.SetStatus(HoldStatus) 587 case DoingStatus: 588 // In progress so stop and undo it. 589 t.SetStatus(AbortStatus) 590 case DoneStatus: 591 // Already done so undo it. 592 t.SetStatus(UndoStatus) 593 } 594 595 for _, lane := range t.Lanes() { 596 if !abortedLanes[lane] { 597 lanes = append(lanes, t.Lanes()...) 598 } 599 } 600 601 for _, halted := range t.HaltTasks() { 602 if !seenTasks[halted.id] { 603 tasks = append(tasks, halted) 604 } 605 } 606 } 607 if len(lanes) > 0 { 608 c.abortLanes(lanes, abortedLanes, seenTasks) 609 } 610 }