github.com/david-imola/snapd@v0.0.0-20210611180407-2de8ddeece6d/overlord/state/change.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2016 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package state
    21  
    22  import (
    23  	"bytes"
    24  	"encoding/json"
    25  	"fmt"
    26  	"strings"
    27  	"time"
    28  )
    29  
    30  // Status is used for status values for changes and tasks.
    31  type Status int
    32  
    33  // Admitted status values for changes and tasks.
    34  const (
    35  	// DefaultStatus is the standard computed status for a change or task.
    36  	// For tasks it's always mapped to DoStatus, and for change its mapped
    37  	// to an aggregation of its tasks' statuses. See Change.Status for details.
    38  	DefaultStatus Status = 0
    39  
    40  	// HoldStatus means the task should not run, perhaps as a consequence of an error on another task.
    41  	HoldStatus Status = 1
    42  
    43  	// DoStatus means the change or task is ready to start.
    44  	DoStatus Status = 2
    45  
    46  	// DoingStatus means the change or task is running or an attempt was made to run it.
    47  	DoingStatus Status = 3
    48  
    49  	// DoneStatus means the change or task was accomplished successfully.
    50  	DoneStatus Status = 4
    51  
    52  	// AbortStatus means the task should stop doing its activities and then undo.
    53  	AbortStatus Status = 5
    54  
    55  	// UndoStatus means the change or task should be undone, probably due to an error elsewhere.
    56  	UndoStatus Status = 6
    57  
    58  	// UndoingStatus means the change or task is being undone or an attempt was made to undo it.
    59  	UndoingStatus Status = 7
    60  
    61  	// UndoneStatus means a task was first done and then undone after an error elsewhere.
    62  	// Changes go directly into the error status instead of being marked as undone.
    63  	UndoneStatus Status = 8
    64  
    65  	// ErrorStatus means the change or task has errored out while running or being undone.
    66  	ErrorStatus Status = 9
    67  
    68  	nStatuses = iota
    69  )
    70  
    71  // Ready returns whether a task or change with this status needs further
    72  // work or has completed its attempt to perform the current goal.
    73  func (s Status) Ready() bool {
    74  	switch s {
    75  	case DoneStatus, UndoneStatus, HoldStatus, ErrorStatus:
    76  		return true
    77  	}
    78  	return false
    79  }
    80  
    81  func (s Status) String() string {
    82  	switch s {
    83  	case DefaultStatus:
    84  		return "Default"
    85  	case DoStatus:
    86  		return "Do"
    87  	case DoingStatus:
    88  		return "Doing"
    89  	case DoneStatus:
    90  		return "Done"
    91  	case AbortStatus:
    92  		return "Abort"
    93  	case UndoStatus:
    94  		return "Undo"
    95  	case UndoingStatus:
    96  		return "Undoing"
    97  	case UndoneStatus:
    98  		return "Undone"
    99  	case HoldStatus:
   100  		return "Hold"
   101  	case ErrorStatus:
   102  		return "Error"
   103  	}
   104  	panic(fmt.Sprintf("internal error: unknown task status code: %d", s))
   105  }
   106  
   107  // Change represents a tracked modification to the system state.
   108  //
   109  // The Change provides both the justification for individual tasks
   110  // to be performed and the grouping of them.
   111  //
   112  // As an example, if an administrator requests an interface connection,
   113  // multiple hooks might be individually run to accomplish the task. The
   114  // Change summary would reflect the request for an interface connection,
   115  // while the individual Task values would track the running of
   116  // the hooks themselves.
   117  type Change struct {
   118  	state   *State
   119  	id      string
   120  	kind    string
   121  	summary string
   122  	status  Status
   123  	clean   bool
   124  	data    customData
   125  	taskIDs []string
   126  	lanes   int
   127  	ready   chan struct{}
   128  
   129  	spawnTime time.Time
   130  	readyTime time.Time
   131  }
   132  
   133  type byReadyTime []*Change
   134  
   135  func (a byReadyTime) Len() int           { return len(a) }
   136  func (a byReadyTime) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   137  func (a byReadyTime) Less(i, j int) bool { return a[i].readyTime.Before(a[j].readyTime) }
   138  
   139  func newChange(state *State, id, kind, summary string) *Change {
   140  	return &Change{
   141  		state:   state,
   142  		id:      id,
   143  		kind:    kind,
   144  		summary: summary,
   145  		data:    make(customData),
   146  		ready:   make(chan struct{}),
   147  
   148  		spawnTime: timeNow(),
   149  	}
   150  }
   151  
   152  type marshalledChange struct {
   153  	ID      string                      `json:"id"`
   154  	Kind    string                      `json:"kind"`
   155  	Summary string                      `json:"summary"`
   156  	Status  Status                      `json:"status"`
   157  	Clean   bool                        `json:"clean,omitempty"`
   158  	Data    map[string]*json.RawMessage `json:"data,omitempty"`
   159  	TaskIDs []string                    `json:"task-ids,omitempty"`
   160  	Lanes   int                         `json:"lanes,omitempty"`
   161  
   162  	SpawnTime time.Time  `json:"spawn-time"`
   163  	ReadyTime *time.Time `json:"ready-time,omitempty"`
   164  }
   165  
   166  // MarshalJSON makes Change a json.Marshaller
   167  func (c *Change) MarshalJSON() ([]byte, error) {
   168  	c.state.reading()
   169  	var readyTime *time.Time
   170  	if !c.readyTime.IsZero() {
   171  		readyTime = &c.readyTime
   172  	}
   173  	return json.Marshal(marshalledChange{
   174  		ID:      c.id,
   175  		Kind:    c.kind,
   176  		Summary: c.summary,
   177  		Status:  c.status,
   178  		Clean:   c.clean,
   179  		Data:    c.data,
   180  		TaskIDs: c.taskIDs,
   181  		Lanes:   c.lanes,
   182  
   183  		SpawnTime: c.spawnTime,
   184  		ReadyTime: readyTime,
   185  	})
   186  }
   187  
   188  // UnmarshalJSON makes Change a json.Unmarshaller
   189  func (c *Change) UnmarshalJSON(data []byte) error {
   190  	if c.state != nil {
   191  		c.state.writing()
   192  	}
   193  	var unmarshalled marshalledChange
   194  	err := json.Unmarshal(data, &unmarshalled)
   195  	if err != nil {
   196  		return err
   197  	}
   198  	c.id = unmarshalled.ID
   199  	c.kind = unmarshalled.Kind
   200  	c.summary = unmarshalled.Summary
   201  	c.status = unmarshalled.Status
   202  	c.clean = unmarshalled.Clean
   203  	custData := unmarshalled.Data
   204  	if custData == nil {
   205  		custData = make(customData)
   206  	}
   207  	c.data = custData
   208  	c.taskIDs = unmarshalled.TaskIDs
   209  	c.lanes = unmarshalled.Lanes
   210  	c.ready = make(chan struct{})
   211  	c.spawnTime = unmarshalled.SpawnTime
   212  	if unmarshalled.ReadyTime != nil {
   213  		c.readyTime = *unmarshalled.ReadyTime
   214  	}
   215  	return nil
   216  }
   217  
   218  // finishUnmarshal is called after the state and tasks are accessible.
   219  func (c *Change) finishUnmarshal() {
   220  	if c.Status().Ready() {
   221  		close(c.ready)
   222  	}
   223  }
   224  
   225  // ID returns the individual random key for the change.
   226  func (c *Change) ID() string {
   227  	return c.id
   228  }
   229  
   230  // Kind returns the nature of the change for managers to know how to handle it.
   231  func (c *Change) Kind() string {
   232  	return c.kind
   233  }
   234  
   235  // Summary returns a summary describing what the change is about.
   236  func (c *Change) Summary() string {
   237  	return c.summary
   238  }
   239  
   240  // Set associates value with key for future consulting by managers.
   241  // The provided value must properly marshal and unmarshal with encoding/json.
   242  func (c *Change) Set(key string, value interface{}) {
   243  	c.state.writing()
   244  	c.data.set(key, value)
   245  }
   246  
   247  // Get unmarshals the stored value associated with the provided key
   248  // into the value parameter.
   249  func (c *Change) Get(key string, value interface{}) error {
   250  	c.state.reading()
   251  	return c.data.get(key, value)
   252  }
   253  
   254  var statusOrder = []Status{
   255  	AbortStatus,
   256  	UndoingStatus,
   257  	UndoStatus,
   258  	DoingStatus,
   259  	DoStatus,
   260  	ErrorStatus,
   261  	UndoneStatus,
   262  	DoneStatus,
   263  	HoldStatus,
   264  }
   265  
   266  func init() {
   267  	if len(statusOrder) != nStatuses-1 {
   268  		panic("statusOrder has wrong number of elements")
   269  	}
   270  }
   271  
   272  // Status returns the current status of the change.
   273  // If the status was not explicitly set the result is derived from the status
   274  // of the individual tasks related to the change, according to the following
   275  // decision sequence:
   276  //
   277  //     - With at least one task in DoStatus, return DoStatus
   278  //     - With at least one task in ErrorStatus, return ErrorStatus
   279  //     - Otherwise, return DoneStatus
   280  //
   281  func (c *Change) Status() Status {
   282  	c.state.reading()
   283  	if c.status == DefaultStatus {
   284  		if len(c.taskIDs) == 0 {
   285  			return HoldStatus
   286  		}
   287  		statusStats := make([]int, nStatuses)
   288  		for _, tid := range c.taskIDs {
   289  			statusStats[c.state.tasks[tid].Status()]++
   290  		}
   291  		for _, s := range statusOrder {
   292  			if statusStats[s] > 0 {
   293  				return s
   294  			}
   295  		}
   296  		panic(fmt.Sprintf("internal error: cannot process change status: %v", statusStats))
   297  	}
   298  	return c.status
   299  }
   300  
   301  // SetStatus sets the change status, overriding the default behavior (see Status method).
   302  func (c *Change) SetStatus(s Status) {
   303  	c.state.writing()
   304  	c.status = s
   305  	if s.Ready() {
   306  		c.markReady()
   307  	}
   308  }
   309  
   310  func (c *Change) markReady() {
   311  	select {
   312  	case <-c.ready:
   313  	default:
   314  		close(c.ready)
   315  	}
   316  	if c.readyTime.IsZero() {
   317  		c.readyTime = timeNow()
   318  	}
   319  }
   320  
   321  // Ready returns a channel that is closed the first time the change becomes ready.
   322  func (c *Change) Ready() <-chan struct{} {
   323  	return c.ready
   324  }
   325  
   326  // taskStatusChanged is called by tasks when their status is changed,
   327  // to give the opportunity for the change to close its ready channel.
   328  func (c *Change) taskStatusChanged(t *Task, old, new Status) {
   329  	if old.Ready() == new.Ready() {
   330  		return
   331  	}
   332  	for _, tid := range c.taskIDs {
   333  		task := c.state.tasks[tid]
   334  		if task != t && !task.status.Ready() {
   335  			return
   336  		}
   337  	}
   338  	// Here is the exact moment when a change goes from unready to ready,
   339  	// and from ready to unready. For now handle only the first of those.
   340  	// For the latter the channel might be replaced in the future.
   341  	if c.IsReady() && !c.Status().Ready() {
   342  		panic(fmt.Errorf("change %s unexpectedly became unready (%s)", c.ID(), c.Status()))
   343  	}
   344  	c.markReady()
   345  }
   346  
   347  // IsClean returns whether all tasks in the change have been cleaned. See SetClean.
   348  func (c *Change) IsClean() bool {
   349  	c.state.reading()
   350  	return c.clean
   351  }
   352  
   353  // IsReady returns whether the change is considered ready.
   354  //
   355  // The result is similar to calling Ready on the status returned by the Status
   356  // method, but this function is more efficient as it doesn't need to recompute
   357  // the aggregated state of tasks on every call.
   358  //
   359  // As an exception, IsReady returns false for a Change without any tasks that
   360  // never had its status explicitly set and was never unmarshalled out of the
   361  // persistent state, despite its initial status being Hold. This is how the
   362  // system represents changes right after they are created.
   363  func (c *Change) IsReady() bool {
   364  	select {
   365  	case <-c.ready:
   366  		return true
   367  	default:
   368  	}
   369  	return false
   370  }
   371  
   372  func (c *Change) taskCleanChanged() {
   373  	if !c.IsReady() {
   374  		panic("internal error: attempted to set a task clean while change not ready")
   375  	}
   376  	for _, tid := range c.taskIDs {
   377  		task := c.state.tasks[tid]
   378  		if !task.clean {
   379  			return
   380  		}
   381  	}
   382  	c.clean = true
   383  }
   384  
   385  // SpawnTime returns the time when the change was created.
   386  func (c *Change) SpawnTime() time.Time {
   387  	c.state.reading()
   388  	return c.spawnTime
   389  }
   390  
   391  // ReadyTime returns the time when the change became ready.
   392  func (c *Change) ReadyTime() time.Time {
   393  	c.state.reading()
   394  	return c.readyTime
   395  }
   396  
   397  // changeError holds a set of task errors.
   398  type changeError struct {
   399  	errors []taskError
   400  }
   401  
   402  type taskError struct {
   403  	task  string
   404  	error string
   405  }
   406  
   407  func (e *changeError) Error() string {
   408  	var buf bytes.Buffer
   409  	buf.WriteString("cannot perform the following tasks:\n")
   410  	for _, te := range e.errors {
   411  		fmt.Fprintf(&buf, "- %s (%s)\n", te.task, te.error)
   412  	}
   413  	return strings.TrimSuffix(buf.String(), "\n")
   414  }
   415  
   416  func stripErrorMsg(msg string) (string, bool) {
   417  	i := strings.Index(msg, " ")
   418  	if i >= 0 && strings.HasPrefix(msg[i:], " ERROR ") {
   419  		return msg[i+len(" ERROR "):], true
   420  	}
   421  	return "", false
   422  }
   423  
   424  // Err returns an error value based on errors that were logged for tasks registered
   425  // in this change, or nil if the change is not in ErrorStatus.
   426  func (c *Change) Err() error {
   427  	c.state.reading()
   428  	if c.Status() != ErrorStatus {
   429  		return nil
   430  	}
   431  	var errors []taskError
   432  	for _, tid := range c.taskIDs {
   433  		task := c.state.tasks[tid]
   434  		if task.Status() != ErrorStatus {
   435  			continue
   436  		}
   437  		for _, msg := range task.Log() {
   438  			if s, ok := stripErrorMsg(msg); ok {
   439  				errors = append(errors, taskError{task.Summary(), s})
   440  			}
   441  		}
   442  	}
   443  	if len(errors) == 0 {
   444  		return fmt.Errorf("internal inconsistency: change %q in ErrorStatus with no task errors logged", c.Kind())
   445  	}
   446  	return &changeError{errors}
   447  }
   448  
   449  // State returns the system State
   450  func (c *Change) State() *State {
   451  	return c.state
   452  }
   453  
   454  // AddTask registers a task as required for the state change to
   455  // be accomplished.
   456  func (c *Change) AddTask(t *Task) {
   457  	c.state.writing()
   458  	if t.change != "" {
   459  		panic(fmt.Sprintf("internal error: cannot add one %q task to multiple changes", t.Kind()))
   460  	}
   461  	t.change = c.id
   462  	c.taskIDs = addOnce(c.taskIDs, t.ID())
   463  }
   464  
   465  // AddAll registers all tasks in the set as required for the state
   466  // change to be accomplished.
   467  func (c *Change) AddAll(ts *TaskSet) {
   468  	c.state.writing()
   469  	for _, t := range ts.tasks {
   470  		c.AddTask(t)
   471  	}
   472  }
   473  
   474  // Tasks returns all the tasks this state change depends on.
   475  func (c *Change) Tasks() []*Task {
   476  	c.state.reading()
   477  	return c.state.tasksIn(c.taskIDs)
   478  }
   479  
   480  // LaneTasks returns all tasks from given lanes the state change depends on.
   481  func (c *Change) LaneTasks(lanes ...int) []*Task {
   482  	laneLookup := make(map[int]bool)
   483  	for _, l := range lanes {
   484  		laneLookup[l] = true
   485  	}
   486  
   487  	c.state.reading()
   488  	var tasks []*Task
   489  	for _, tid := range c.taskIDs {
   490  		t := c.state.tasks[tid]
   491  		if len(t.lanes) == 0 && laneLookup[0] {
   492  			tasks = append(tasks, t)
   493  		}
   494  		for _, l := range t.lanes {
   495  			if laneLookup[l] {
   496  				tasks = append(tasks, t)
   497  				break
   498  			}
   499  		}
   500  	}
   501  	return tasks
   502  }
   503  
   504  // Abort flags the change for cancellation, whether in progress or not.
   505  // Cancellation will proceed at the next ensure pass.
   506  func (c *Change) Abort() {
   507  	c.state.writing()
   508  	tasks := make([]*Task, len(c.taskIDs))
   509  	for i, tid := range c.taskIDs {
   510  		tasks[i] = c.state.tasks[tid]
   511  	}
   512  	c.abortTasks(tasks, make(map[int]bool), make(map[string]bool))
   513  }
   514  
   515  // AbortLanes aborts all tasks in the provided lanes and any tasks waiting on them,
   516  // except for tasks that are also in a healthy lane (not aborted, and not waiting
   517  // on aborted).
   518  func (c *Change) AbortLanes(lanes []int) {
   519  	c.state.writing()
   520  	c.abortLanes(lanes, make(map[int]bool), make(map[string]bool))
   521  }
   522  
   523  func (c *Change) abortLanes(lanes []int, abortedLanes map[int]bool, seenTasks map[string]bool) {
   524  	var hasLive = make(map[int]bool)
   525  	var hasDead = make(map[int]bool)
   526  	var laneTasks []*Task
   527  NextChangeTask:
   528  	for _, tid := range c.taskIDs {
   529  		t := c.state.tasks[tid]
   530  
   531  		var live bool
   532  		switch t.Status() {
   533  		case DoStatus, DoingStatus, DoneStatus:
   534  			live = true
   535  		}
   536  
   537  		for _, tlane := range t.Lanes() {
   538  			for _, lane := range lanes {
   539  				if tlane == lane {
   540  					laneTasks = append(laneTasks, t)
   541  					continue NextChangeTask
   542  				}
   543  			}
   544  
   545  			// Track opinion about lanes not in the kill list.
   546  			// If the lane ends up being entirely live, we'll
   547  			// preserve this task alive too.
   548  			if live {
   549  				hasLive[tlane] = true
   550  			} else {
   551  				hasDead[tlane] = true
   552  			}
   553  		}
   554  	}
   555  
   556  	abortTasks := make([]*Task, 0, len(laneTasks))
   557  NextLaneTask:
   558  	for _, t := range laneTasks {
   559  		for _, tlane := range t.Lanes() {
   560  			if hasLive[tlane] && !hasDead[tlane] {
   561  				continue NextLaneTask
   562  			}
   563  		}
   564  		abortTasks = append(abortTasks, t)
   565  	}
   566  
   567  	for _, lane := range lanes {
   568  		abortedLanes[lane] = true
   569  	}
   570  	if len(abortTasks) > 0 {
   571  		c.abortTasks(abortTasks, abortedLanes, seenTasks)
   572  	}
   573  }
   574  
   575  func (c *Change) abortTasks(tasks []*Task, abortedLanes map[int]bool, seenTasks map[string]bool) {
   576  	var lanes []int
   577  	for i := 0; i < len(tasks); i++ {
   578  		t := tasks[i]
   579  		if seenTasks[t.id] {
   580  			continue
   581  		}
   582  		seenTasks[t.id] = true
   583  		switch t.Status() {
   584  		case DoStatus:
   585  			// Still pending so don't even start.
   586  			t.SetStatus(HoldStatus)
   587  		case DoingStatus:
   588  			// In progress so stop and undo it.
   589  			t.SetStatus(AbortStatus)
   590  		case DoneStatus:
   591  			// Already done so undo it.
   592  			t.SetStatus(UndoStatus)
   593  		}
   594  
   595  		for _, lane := range t.Lanes() {
   596  			if !abortedLanes[lane] {
   597  				lanes = append(lanes, t.Lanes()...)
   598  			}
   599  		}
   600  
   601  		for _, halted := range t.HaltTasks() {
   602  			if !seenTasks[halted.id] {
   603  				tasks = append(tasks, halted)
   604  			}
   605  		}
   606  	}
   607  	if len(lanes) > 0 {
   608  		c.abortLanes(lanes, abortedLanes, seenTasks)
   609  	}
   610  }