github.com/rigado/snapd@v2.42.5-go-mod+incompatible/overlord/overlord.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2016-2017 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package overlord implements the overall control of a snappy system.
    21  package overlord
    22  
    23  import (
    24  	"fmt"
    25  	"net/http"
    26  	"net/url"
    27  	"os"
    28  	"path/filepath"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"gopkg.in/tomb.v2"
    34  
    35  	"github.com/snapcore/snapd/dirs"
    36  	"github.com/snapcore/snapd/logger"
    37  	"github.com/snapcore/snapd/osutil"
    38  
    39  	"github.com/snapcore/snapd/overlord/assertstate"
    40  	"github.com/snapcore/snapd/overlord/cmdstate"
    41  	"github.com/snapcore/snapd/overlord/configstate"
    42  	"github.com/snapcore/snapd/overlord/configstate/proxyconf"
    43  	"github.com/snapcore/snapd/overlord/devicestate"
    44  	"github.com/snapcore/snapd/overlord/healthstate"
    45  	"github.com/snapcore/snapd/overlord/hookstate"
    46  	"github.com/snapcore/snapd/overlord/ifacestate"
    47  	"github.com/snapcore/snapd/overlord/patch"
    48  	"github.com/snapcore/snapd/overlord/snapshotstate"
    49  	"github.com/snapcore/snapd/overlord/snapstate"
    50  	"github.com/snapcore/snapd/overlord/state"
    51  	"github.com/snapcore/snapd/overlord/storecontext"
    52  	"github.com/snapcore/snapd/store"
    53  	"github.com/snapcore/snapd/timings"
    54  )
    55  
    56  var (
    57  	ensureInterval = 5 * time.Minute
    58  	pruneInterval  = 10 * time.Minute
    59  	pruneWait      = 24 * time.Hour * 1
    60  	abortWait      = 24 * time.Hour * 7
    61  
    62  	pruneMaxChanges = 500
    63  
    64  	defaultCachedDownloads = 5
    65  
    66  	configstateInit = configstate.Init
    67  )
    68  
    69  // Overlord is the central manager of a snappy system, keeping
    70  // track of all available state managers and related helpers.
    71  type Overlord struct {
    72  	stateEng *StateEngine
    73  	// ensure loop
    74  	loopTomb    *tomb.Tomb
    75  	ensureLock  sync.Mutex
    76  	ensureTimer *time.Timer
    77  	ensureNext  time.Time
    78  	ensureRun   int32
    79  	pruneTicker *time.Ticker
    80  	// restarts
    81  	restartBehavior RestartBehavior
    82  	// managers
    83  	inited    bool
    84  	startedUp bool
    85  	runner    *state.TaskRunner
    86  	snapMgr   *snapstate.SnapManager
    87  	assertMgr *assertstate.AssertManager
    88  	ifaceMgr  *ifacestate.InterfaceManager
    89  	hookMgr   *hookstate.HookManager
    90  	deviceMgr *devicestate.DeviceManager
    91  	cmdMgr    *cmdstate.CommandManager
    92  	shotMgr   *snapshotstate.SnapshotManager
    93  	// proxyConf mediates the http proxy config
    94  	proxyConf func(req *http.Request) (*url.URL, error)
    95  }
    96  
    97  // RestartBehavior controls how to hanndle and carry forward restart requests
    98  // via the state.
    99  type RestartBehavior interface {
   100  	HandleRestart(t state.RestartType)
   101  	// RebootAsExpected is called early when either a reboot was
   102  	// requested by snapd and happened or no reboot was expected at all.
   103  	RebootAsExpected(st *state.State) error
   104  	// RebootDidNotHappen is called early instead when a reboot was
   105  	// requested by snad but did not happen.
   106  	RebootDidNotHappen(st *state.State) error
   107  }
   108  
   109  var storeNew = store.New
   110  
   111  // New creates a new Overlord with all its state managers.
   112  // It can be provided with an optional RestartBehavior.
   113  func New(restartBehavior RestartBehavior) (*Overlord, error) {
   114  	o := &Overlord{
   115  		loopTomb:        new(tomb.Tomb),
   116  		inited:          true,
   117  		restartBehavior: restartBehavior,
   118  	}
   119  
   120  	backend := &overlordStateBackend{
   121  		path:           dirs.SnapStateFile,
   122  		ensureBefore:   o.ensureBefore,
   123  		requestRestart: o.requestRestart,
   124  	}
   125  	s, err := loadState(backend, restartBehavior)
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	o.stateEng = NewStateEngine(s)
   131  	o.runner = state.NewTaskRunner(s)
   132  
   133  	// any unknown task should be ignored and succeed
   134  	matchAnyUnknownTask := func(_ *state.Task) bool {
   135  		return true
   136  	}
   137  	o.runner.AddOptionalHandler(matchAnyUnknownTask, handleUnknownTask, nil)
   138  
   139  	hookMgr, err := hookstate.Manager(s, o.runner)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	o.addManager(hookMgr)
   144  
   145  	snapMgr, err := snapstate.Manager(s, o.runner)
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  	o.addManager(snapMgr)
   150  
   151  	assertMgr, err := assertstate.Manager(s, o.runner)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  	o.addManager(assertMgr)
   156  
   157  	ifaceMgr, err := ifacestate.Manager(s, hookMgr, o.runner, nil, nil)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  	o.addManager(ifaceMgr)
   162  
   163  	deviceMgr, err := devicestate.Manager(s, hookMgr, o.runner, o.newStore)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  	o.addManager(deviceMgr)
   168  
   169  	o.addManager(cmdstate.Manager(s, o.runner))
   170  	o.addManager(snapshotstate.Manager(s, o.runner))
   171  
   172  	if err := configstateInit(s, hookMgr); err != nil {
   173  		return nil, err
   174  	}
   175  	healthstate.Init(hookMgr)
   176  
   177  	// the shared task runner should be added last!
   178  	o.stateEng.AddManager(o.runner)
   179  
   180  	s.Lock()
   181  	defer s.Unlock()
   182  	// setting up the store
   183  	o.proxyConf = proxyconf.New(s).Conf
   184  	storeCtx := storecontext.New(s, o.deviceMgr.StoreContextBackend())
   185  	sto := o.newStoreWithContext(storeCtx)
   186  
   187  	snapstate.ReplaceStore(s, sto)
   188  
   189  	return o, nil
   190  }
   191  
   192  func (o *Overlord) addManager(mgr StateManager) {
   193  	switch x := mgr.(type) {
   194  	case *hookstate.HookManager:
   195  		o.hookMgr = x
   196  	case *snapstate.SnapManager:
   197  		o.snapMgr = x
   198  	case *assertstate.AssertManager:
   199  		o.assertMgr = x
   200  	case *ifacestate.InterfaceManager:
   201  		o.ifaceMgr = x
   202  	case *devicestate.DeviceManager:
   203  		o.deviceMgr = x
   204  	case *cmdstate.CommandManager:
   205  		o.cmdMgr = x
   206  	case *snapshotstate.SnapshotManager:
   207  		o.shotMgr = x
   208  	}
   209  	o.stateEng.AddManager(mgr)
   210  }
   211  
   212  func loadState(backend state.Backend, restartBehavior RestartBehavior) (*state.State, error) {
   213  	curBootID, err := osutil.BootID()
   214  	if err != nil {
   215  		return nil, fmt.Errorf("fatal: cannot find current boot id: %v", err)
   216  	}
   217  
   218  	perfTimings := timings.New(map[string]string{"startup": "load-state"})
   219  
   220  	if !osutil.FileExists(dirs.SnapStateFile) {
   221  		// fail fast, mostly interesting for tests, this dir is setup
   222  		// by the snapd package
   223  		stateDir := filepath.Dir(dirs.SnapStateFile)
   224  		if !osutil.IsDirectory(stateDir) {
   225  			return nil, fmt.Errorf("fatal: directory %q must be present", stateDir)
   226  		}
   227  		s := state.New(backend)
   228  		s.Lock()
   229  		s.VerifyReboot(curBootID)
   230  		s.Unlock()
   231  		patch.Init(s)
   232  		return s, nil
   233  	}
   234  
   235  	r, err := os.Open(dirs.SnapStateFile)
   236  	if err != nil {
   237  		return nil, fmt.Errorf("cannot read the state file: %s", err)
   238  	}
   239  	defer r.Close()
   240  
   241  	var s *state.State
   242  	timings.Run(perfTimings, "read-state", "read snapd state from disk", func(tm timings.Measurer) {
   243  		s, err = state.ReadState(backend, r)
   244  	})
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  	s.Lock()
   249  	perfTimings.Save(s)
   250  	s.Unlock()
   251  
   252  	err = verifyReboot(s, curBootID, restartBehavior)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  
   257  	// one-shot migrations
   258  	err = patch.Apply(s)
   259  	if err != nil {
   260  		return nil, err
   261  	}
   262  	return s, nil
   263  }
   264  
   265  func verifyReboot(s *state.State, curBootID string, restartBehavior RestartBehavior) error {
   266  	s.Lock()
   267  	defer s.Unlock()
   268  	err := s.VerifyReboot(curBootID)
   269  	if err != nil && err != state.ErrExpectedReboot {
   270  		return err
   271  	}
   272  	expectedRebootDidNotHappen := err == state.ErrExpectedReboot
   273  	if restartBehavior != nil {
   274  		if expectedRebootDidNotHappen {
   275  			return restartBehavior.RebootDidNotHappen(s)
   276  		}
   277  		return restartBehavior.RebootAsExpected(s)
   278  	}
   279  	if expectedRebootDidNotHappen {
   280  		logger.Noticef("expected system restart but it did not happen")
   281  	}
   282  	return nil
   283  }
   284  
   285  func (o *Overlord) newStoreWithContext(storeCtx store.DeviceAndAuthContext) snapstate.StoreService {
   286  	cfg := store.DefaultConfig()
   287  	cfg.Proxy = o.proxyConf
   288  	sto := storeNew(cfg, storeCtx)
   289  	sto.SetCacheDownloads(defaultCachedDownloads)
   290  	return sto
   291  }
   292  
   293  // newStore can make new stores for use during remodeling.
   294  // The device backend will tie them to the remodeling device state.
   295  func (o *Overlord) newStore(devBE storecontext.DeviceBackend) snapstate.StoreService {
   296  	scb := o.deviceMgr.StoreContextBackend()
   297  	stoCtx := storecontext.NewComposed(o.State(), devBE, scb, scb)
   298  	return o.newStoreWithContext(stoCtx)
   299  }
   300  
   301  // StartUp proceeds to run any expensive Overlord or managers initialization. After this is done once it is a noop.
   302  func (o *Overlord) StartUp() error {
   303  	if o.startedUp {
   304  		return nil
   305  	}
   306  	o.startedUp = true
   307  
   308  	// slow down for tests
   309  	if s := os.Getenv("SNAPD_SLOW_STARTUP"); s != "" {
   310  		if d, err := time.ParseDuration(s); err == nil {
   311  			logger.Noticef("slowing down startup by %v as requested", d)
   312  
   313  			time.Sleep(d)
   314  		}
   315  	}
   316  
   317  	return o.stateEng.StartUp()
   318  }
   319  
   320  // StartupTimeout computes a usable timeout for the startup
   321  // initializations by using a pessimistic estimate.
   322  func (o *Overlord) StartupTimeout() (timeout time.Duration, reasoning string, err error) {
   323  	// TODO: adjust based on real hardware measurements
   324  	st := o.State()
   325  	st.Lock()
   326  	defer st.Unlock()
   327  	n, err := snapstate.NumSnaps(st)
   328  	if err != nil {
   329  		return 0, "", err
   330  	}
   331  	// number of snaps (and connections) play a role
   332  	reasoning = "pessimistic estimate of 30s plus 5s per snap"
   333  	to := (30 * time.Second) + time.Duration(n)*(5*time.Second)
   334  	return to, reasoning, nil
   335  }
   336  
   337  func (o *Overlord) ensureTimerSetup() {
   338  	o.ensureLock.Lock()
   339  	defer o.ensureLock.Unlock()
   340  	o.ensureTimer = time.NewTimer(ensureInterval)
   341  	o.ensureNext = time.Now().Add(ensureInterval)
   342  	o.pruneTicker = time.NewTicker(pruneInterval)
   343  }
   344  
   345  func (o *Overlord) ensureTimerReset() time.Time {
   346  	o.ensureLock.Lock()
   347  	defer o.ensureLock.Unlock()
   348  	now := time.Now()
   349  	o.ensureTimer.Reset(ensureInterval)
   350  	o.ensureNext = now.Add(ensureInterval)
   351  	return o.ensureNext
   352  }
   353  
   354  func (o *Overlord) ensureBefore(d time.Duration) {
   355  	o.ensureLock.Lock()
   356  	defer o.ensureLock.Unlock()
   357  	if o.ensureTimer == nil {
   358  		panic("cannot use EnsureBefore before Overlord.Loop")
   359  	}
   360  	now := time.Now()
   361  	next := now.Add(d)
   362  	if next.Before(o.ensureNext) {
   363  		o.ensureTimer.Reset(d)
   364  		o.ensureNext = next
   365  		return
   366  	}
   367  
   368  	if o.ensureNext.Before(now) {
   369  		// timer already expired, it will be reset in Loop() and
   370  		// next Ensure() will be called shortly.
   371  		if !o.ensureTimer.Stop() {
   372  			return
   373  		}
   374  		o.ensureTimer.Reset(0)
   375  		o.ensureNext = now
   376  	}
   377  }
   378  
   379  func (o *Overlord) requestRestart(t state.RestartType) {
   380  	if o.restartBehavior == nil {
   381  		logger.Noticef("restart requested but no behavior set")
   382  	} else {
   383  		o.restartBehavior.HandleRestart(t)
   384  	}
   385  }
   386  
   387  // Loop runs a loop in a goroutine to ensure the current state regularly through StateEngine Ensure.
   388  func (o *Overlord) Loop() {
   389  	o.ensureTimerSetup()
   390  	o.loopTomb.Go(func() error {
   391  		for {
   392  			// TODO: pass a proper context into Ensure
   393  			o.ensureTimerReset()
   394  			// in case of errors engine logs them,
   395  			// continue to the next Ensure() try for now
   396  			o.stateEng.Ensure()
   397  			o.ensureDidRun()
   398  			select {
   399  			case <-o.loopTomb.Dying():
   400  				return nil
   401  			case <-o.ensureTimer.C:
   402  			case <-o.pruneTicker.C:
   403  				st := o.State()
   404  				st.Lock()
   405  				st.Prune(pruneWait, abortWait, pruneMaxChanges)
   406  				st.Unlock()
   407  			}
   408  		}
   409  	})
   410  }
   411  
   412  func (o *Overlord) ensureDidRun() {
   413  	atomic.StoreInt32(&o.ensureRun, 1)
   414  }
   415  
   416  func (o *Overlord) CanStandby() bool {
   417  	run := atomic.LoadInt32(&o.ensureRun)
   418  	return run != 0
   419  }
   420  
   421  // Stop stops the ensure loop and the managers under the StateEngine.
   422  func (o *Overlord) Stop() error {
   423  	o.loopTomb.Kill(nil)
   424  	err := o.loopTomb.Wait()
   425  	o.stateEng.Stop()
   426  	return err
   427  }
   428  
   429  func (o *Overlord) settle(timeout time.Duration, beforeCleanups func()) error {
   430  	if err := o.StartUp(); err != nil {
   431  		return err
   432  	}
   433  
   434  	func() {
   435  		o.ensureLock.Lock()
   436  		defer o.ensureLock.Unlock()
   437  		if o.ensureTimer != nil {
   438  			panic("cannot use Settle concurrently with other Settle or Loop calls")
   439  		}
   440  		o.ensureTimer = time.NewTimer(0)
   441  	}()
   442  
   443  	defer func() {
   444  		o.ensureLock.Lock()
   445  		defer o.ensureLock.Unlock()
   446  		o.ensureTimer.Stop()
   447  		o.ensureTimer = nil
   448  	}()
   449  
   450  	t0 := time.Now()
   451  	done := false
   452  	var errs []error
   453  	for !done {
   454  		if timeout > 0 && time.Since(t0) > timeout {
   455  			err := fmt.Errorf("Settle is not converging")
   456  			if len(errs) != 0 {
   457  				return &ensureError{append(errs, err)}
   458  			}
   459  			return err
   460  		}
   461  		next := o.ensureTimerReset()
   462  		err := o.stateEng.Ensure()
   463  		switch ee := err.(type) {
   464  		case nil:
   465  		case *ensureError:
   466  			errs = append(errs, ee.errs...)
   467  		default:
   468  			errs = append(errs, err)
   469  		}
   470  		o.stateEng.Wait()
   471  		o.ensureLock.Lock()
   472  		done = o.ensureNext.Equal(next)
   473  		o.ensureLock.Unlock()
   474  		if done {
   475  			if beforeCleanups != nil {
   476  				beforeCleanups()
   477  				beforeCleanups = nil
   478  			}
   479  			// we should wait also for cleanup handlers
   480  			st := o.State()
   481  			st.Lock()
   482  			for _, chg := range st.Changes() {
   483  				if chg.IsReady() && !chg.IsClean() {
   484  					done = false
   485  					break
   486  				}
   487  			}
   488  			st.Unlock()
   489  		}
   490  	}
   491  	if len(errs) != 0 {
   492  		return &ensureError{errs}
   493  	}
   494  	return nil
   495  }
   496  
   497  // Settle runs first a state engine Ensure and then wait for
   498  // activities to settle. That's done by waiting for all managers'
   499  // activities to settle while making sure no immediate further Ensure
   500  // is scheduled. It then waits similarly for all ready changes to
   501  // reach the clean state. Chiefly for tests. Cannot be used in
   502  // conjunction with Loop. If timeout is non-zero and settling takes
   503  // longer than timeout, returns an error. Calls StartUp as well.
   504  func (o *Overlord) Settle(timeout time.Duration) error {
   505  	return o.settle(timeout, nil)
   506  }
   507  
   508  // SettleObserveBeforeCleanups runs first a state engine Ensure and
   509  // then wait for activities to settle. That's done by waiting for all
   510  // managers' activities to settle while making sure no immediate
   511  // further Ensure is scheduled. It then waits similarly for all ready
   512  // changes to reach the clean state, but calls once the provided
   513  // callback before doing that. Chiefly for tests. Cannot be used in
   514  // conjunction with Loop. If timeout is non-zero and settling takes
   515  // longer than timeout, returns an error. Calls StartUp as well.
   516  func (o *Overlord) SettleObserveBeforeCleanups(timeout time.Duration, beforeCleanups func()) error {
   517  	return o.settle(timeout, beforeCleanups)
   518  }
   519  
   520  // State returns the system state managed by the overlord.
   521  func (o *Overlord) State() *state.State {
   522  	return o.stateEng.State()
   523  }
   524  
   525  // StateEngine returns the stage engine used by overlord.
   526  func (o *Overlord) StateEngine() *StateEngine {
   527  	return o.stateEng
   528  }
   529  
   530  // TaskRunner returns the shared task runner responsible for running
   531  // tasks for all managers under the overlord.
   532  func (o *Overlord) TaskRunner() *state.TaskRunner {
   533  	return o.runner
   534  }
   535  
   536  // SnapManager returns the snap manager responsible for snaps under
   537  // the overlord.
   538  func (o *Overlord) SnapManager() *snapstate.SnapManager {
   539  	return o.snapMgr
   540  }
   541  
   542  // AssertManager returns the assertion manager enforcing assertions
   543  // under the overlord.
   544  func (o *Overlord) AssertManager() *assertstate.AssertManager {
   545  	return o.assertMgr
   546  }
   547  
   548  // InterfaceManager returns the interface manager maintaining
   549  // interface connections under the overlord.
   550  func (o *Overlord) InterfaceManager() *ifacestate.InterfaceManager {
   551  	return o.ifaceMgr
   552  }
   553  
   554  // HookManager returns the hook manager responsible for running hooks
   555  // under the overlord.
   556  func (o *Overlord) HookManager() *hookstate.HookManager {
   557  	return o.hookMgr
   558  }
   559  
   560  // DeviceManager returns the device manager responsible for the device
   561  // identity and policies.
   562  func (o *Overlord) DeviceManager() *devicestate.DeviceManager {
   563  	return o.deviceMgr
   564  }
   565  
   566  // CommandManager returns the manager responsible for running odd
   567  // jobs.
   568  func (o *Overlord) CommandManager() *cmdstate.CommandManager {
   569  	return o.cmdMgr
   570  }
   571  
   572  // SnapshotManager returns the manager responsible for snapshots.
   573  func (o *Overlord) SnapshotManager() *snapshotstate.SnapshotManager {
   574  	return o.shotMgr
   575  }
   576  
   577  // Mock creates an Overlord without any managers and with a backend
   578  // not using disk. Managers can be added with AddManager. For testing.
   579  func Mock() *Overlord {
   580  	return MockWithRestartHandler(nil)
   581  }
   582  
   583  // MockWithRestartHandler creates an Overlord without any managers and
   584  // with a backend not using disk. It will use the given handler on
   585  // restart requests. Managers can be added with AddManager. For
   586  // testing.
   587  func MockWithRestartHandler(handleRestart func(state.RestartType)) *Overlord {
   588  	o := &Overlord{
   589  		loopTomb:        new(tomb.Tomb),
   590  		inited:          false,
   591  		restartBehavior: mockRestartBehavior(handleRestart),
   592  	}
   593  	s := state.New(mockBackend{o: o})
   594  	o.stateEng = NewStateEngine(s)
   595  	o.runner = state.NewTaskRunner(s)
   596  
   597  	return o
   598  }
   599  
   600  // AddManager adds a manager to the overlord created with Mock. For
   601  // testing.
   602  func (o *Overlord) AddManager(mgr StateManager) {
   603  	if o.inited {
   604  		panic("internal error: cannot add managers to a fully initialized Overlord")
   605  	}
   606  	o.addManager(mgr)
   607  }
   608  
   609  type mockRestartBehavior func(state.RestartType)
   610  
   611  func (rb mockRestartBehavior) HandleRestart(t state.RestartType) {
   612  	if rb == nil {
   613  		return
   614  	}
   615  	rb(t)
   616  }
   617  
   618  func (rb mockRestartBehavior) RebootAsExpected(*state.State) error {
   619  	panic("internal error: overlord.Mock should not invoke RebootAsExpected")
   620  }
   621  
   622  func (rb mockRestartBehavior) RebootDidNotHappen(*state.State) error {
   623  	panic("internal error: overlord.Mock should not invoke RebootDidNotHappen")
   624  }
   625  
   626  type mockBackend struct {
   627  	o *Overlord
   628  }
   629  
   630  func (mb mockBackend) Checkpoint(data []byte) error {
   631  	return nil
   632  }
   633  
   634  func (mb mockBackend) EnsureBefore(d time.Duration) {
   635  	mb.o.ensureLock.Lock()
   636  	timer := mb.o.ensureTimer
   637  	mb.o.ensureLock.Unlock()
   638  	if timer == nil {
   639  		return
   640  	}
   641  
   642  	mb.o.ensureBefore(d)
   643  }
   644  
   645  func (mb mockBackend) RequestRestart(t state.RestartType) {
   646  	mb.o.requestRestart(t)
   647  }