github.com/freetocompute/snapd@v0.0.0-20210618182524-2fb355d72fd9/overlord/servicestate/quota_handlers.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2021 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package servicestate
    21  
    22  import (
    23  	"fmt"
    24  	"sort"
    25  	"time"
    26  
    27  	tomb "gopkg.in/tomb.v2"
    28  
    29  	"github.com/snapcore/snapd/gadget/quantity"
    30  	"github.com/snapcore/snapd/logger"
    31  	"github.com/snapcore/snapd/overlord/snapstate"
    32  	"github.com/snapcore/snapd/overlord/state"
    33  	"github.com/snapcore/snapd/progress"
    34  	"github.com/snapcore/snapd/snap"
    35  	"github.com/snapcore/snapd/snap/quota"
    36  	"github.com/snapcore/snapd/snapdenv"
    37  	"github.com/snapcore/snapd/strutil"
    38  	"github.com/snapcore/snapd/systemd"
    39  	"github.com/snapcore/snapd/timings"
    40  	"github.com/snapcore/snapd/wrappers"
    41  )
    42  
    43  // QuotaControlAction is the serialized representation of a quota group
    44  // modification that lives in a task.
    45  type QuotaControlAction struct {
    46  	// QuotaName is the name of the quota group being controlled.
    47  	QuotaName string `json:"quota-name"`
    48  
    49  	// Action is the action being taken on the quota group. It can be either
    50  	// "create", "update", or "remove".
    51  	Action string `json:"action"`
    52  
    53  	// AddSnaps is the set of snaps to add to the quota group, valid for either
    54  	// the "update" or the "create" actions.
    55  	AddSnaps []string `json:"snaps"`
    56  
    57  	// MemoryLimit is the memory limit for the quota group being controlled,
    58  	// either the initial limit the group is created with for the "create"
    59  	// action, or if non-zero for the "update" the memory limit, then the new
    60  	// value to be set.
    61  	MemoryLimit quantity.Size
    62  
    63  	// ParentName is the name of the parent for the quota group if it is being
    64  	// created. Eventually this could be used with the "update" action to
    65  	// support moving quota groups from one parent to another, but that is
    66  	// currently not supported.
    67  	ParentName string
    68  }
    69  
    70  func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error {
    71  	st := t.State()
    72  	st.Lock()
    73  	defer st.Unlock()
    74  
    75  	perfTimings := state.TimingsForTask(t)
    76  	defer perfTimings.Save(st)
    77  
    78  	meter := snapstate.NewTaskProgressAdapterUnlocked(t)
    79  
    80  	qcs := []QuotaControlAction{}
    81  	err := t.Get("quota-control-actions", &qcs)
    82  	if err != nil {
    83  		return fmt.Errorf("internal error: cannot get quota-control-action: %v", err)
    84  	}
    85  
    86  	// TODO: support more than one action
    87  	switch {
    88  	case len(qcs) > 1:
    89  		return fmt.Errorf("multiple quota group actions not supported yet")
    90  	case len(qcs) == 0:
    91  		return fmt.Errorf("internal error: no quota group actions for quota-control task")
    92  	}
    93  
    94  	qc := qcs[0]
    95  
    96  	allGrps, err := AllQuotas(st)
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	switch qc.Action {
   102  	case "create":
   103  		err = quotaCreate(st, t, qc, allGrps, meter, perfTimings)
   104  	case "remove":
   105  		err = quotaRemove(st, t, qc, allGrps, meter, perfTimings)
   106  	case "update":
   107  		err = quotaUpdate(st, t, qc, allGrps, meter, perfTimings)
   108  	default:
   109  		err = fmt.Errorf("unknown action %q requested", qc.Action)
   110  	}
   111  
   112  	return err
   113  }
   114  
   115  func quotaCreate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   116  	// make sure the group does not exist yet
   117  	if _, ok := allGrps[action.QuotaName]; ok {
   118  		return fmt.Errorf("group %q already exists", action.QuotaName)
   119  	}
   120  
   121  	// make sure the memory limit is not zero
   122  	// TODO: this needs to be updated to 4K when PR snapcore/snapd#10346 lands
   123  	// and an equivalent check needs to be put back into CreateQuota() before
   124  	// the tasks are created
   125  	if action.MemoryLimit == 0 {
   126  		return fmt.Errorf("internal error, MemoryLimit option is mandatory for create action")
   127  	}
   128  
   129  	// make sure the memory limit is at least 4K, that is the minimum size
   130  	// to allow nesting, otherwise groups with less than 4K will trigger the
   131  	// oom killer to be invoked when a new group is added as a sub-group to the
   132  	// larger group.
   133  	if action.MemoryLimit <= 4*quantity.SizeKiB {
   134  		return fmt.Errorf("memory limit for group %q is too small: size must be larger than 4KB", action.QuotaName)
   135  	}
   136  
   137  	// make sure the specified snaps exist and aren't currently in another group
   138  	if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
   139  		return err
   140  	}
   141  
   142  	grp, allGrps, err := quotaCreateImpl(st, action, allGrps)
   143  	if err != nil {
   144  		return err
   145  	}
   146  
   147  	// ensure the snap services with the group
   148  	opts := &ensureSnapServicesForGroupOptions{
   149  		allGrps: allGrps,
   150  	}
   151  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   152  }
   153  
   154  func quotaCreateImpl(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) {
   155  	// make sure that the parent group exists if we are creating a sub-group
   156  	var grp *quota.Group
   157  	var err error
   158  	updatedGrps := []*quota.Group{}
   159  	if action.ParentName != "" {
   160  		parentGrp, ok := allGrps[action.ParentName]
   161  		if !ok {
   162  			return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName)
   163  		}
   164  
   165  		grp, err = parentGrp.NewSubGroup(action.QuotaName, action.MemoryLimit)
   166  		if err != nil {
   167  			return nil, nil, err
   168  		}
   169  
   170  		updatedGrps = append(updatedGrps, parentGrp)
   171  	} else {
   172  		// make a new group
   173  		grp, err = quota.NewGroup(action.QuotaName, action.MemoryLimit)
   174  		if err != nil {
   175  			return nil, nil, err
   176  		}
   177  	}
   178  	updatedGrps = append(updatedGrps, grp)
   179  
   180  	// put the snaps in the group
   181  	grp.Snaps = action.AddSnaps
   182  	// update the modified groups in state
   183  	newAllGrps, err := patchQuotas(st, updatedGrps...)
   184  	if err != nil {
   185  		return nil, nil, err
   186  	}
   187  
   188  	return grp, newAllGrps, nil
   189  }
   190  
   191  func quotaRemove(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   192  	// make sure the group exists
   193  	grp, ok := allGrps[action.QuotaName]
   194  	if !ok {
   195  		return fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName)
   196  	}
   197  
   198  	// make sure some of the options are not set, it's an internal error if
   199  	// anything other than the name and action are set for a removal
   200  	if action.ParentName != "" {
   201  		return fmt.Errorf("internal error, ParentName option cannot be used with remove action")
   202  	}
   203  
   204  	if len(action.AddSnaps) != 0 {
   205  		return fmt.Errorf("internal error, AddSnaps option cannot be used with remove action")
   206  	}
   207  
   208  	if action.MemoryLimit != 0 {
   209  		return fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action")
   210  	}
   211  
   212  	// XXX: remove this limitation eventually
   213  	if len(grp.SubGroups) != 0 {
   214  		return fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first")
   215  	}
   216  
   217  	// if this group has a parent, we need to remove the linkage to this
   218  	// sub-group from the parent first
   219  	if grp.ParentGroup != "" {
   220  		// the parent here must exist otherwise AllQuotas would have failed
   221  		// because state would have been inconsistent
   222  		parent := allGrps[grp.ParentGroup]
   223  
   224  		// ensure that the parent group of this group no longer mentions this
   225  		// group as a sub-group - we know that it must since AllQuotas validated
   226  		// the state for us
   227  		if len(parent.SubGroups) == 1 {
   228  			// this group was an only child, so clear the whole list
   229  			parent.SubGroups = nil
   230  		} else {
   231  			// we have to delete the child but keep the other children
   232  			newSubgroups := make([]string, 0, len(parent.SubGroups)-1)
   233  			for _, sub := range parent.SubGroups {
   234  				if sub != action.QuotaName {
   235  					newSubgroups = append(newSubgroups, sub)
   236  				}
   237  			}
   238  
   239  			parent.SubGroups = newSubgroups
   240  		}
   241  
   242  		allGrps[grp.ParentGroup] = parent
   243  	}
   244  
   245  	// now delete the group from state - do this first for convenience to ensure
   246  	// that we can just use SnapServiceOptions below and since it operates via
   247  	// state, it will immediately reflect the deletion
   248  	delete(allGrps, action.QuotaName)
   249  
   250  	// make sure that the group set is consistent before saving it - we may need
   251  	// to delete old links from this group's parent to the child
   252  	if err := quota.ResolveCrossReferences(allGrps); err != nil {
   253  		return fmt.Errorf("cannot remove quota %q: %v", action.QuotaName, err)
   254  	}
   255  
   256  	// now set it in state
   257  	st.Set("quotas", allGrps)
   258  
   259  	// update snap service units that may need to be re-written because they are
   260  	// not in a slice anymore
   261  	opts := &ensureSnapServicesForGroupOptions{
   262  		allGrps: allGrps,
   263  	}
   264  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   265  }
   266  
   267  func quotaUpdate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   268  	// make sure the group exists
   269  	grp, ok := allGrps[action.QuotaName]
   270  	if !ok {
   271  		return fmt.Errorf("group %q does not exist", action.QuotaName)
   272  	}
   273  
   274  	// check that ParentName is not set, since we don't currently support
   275  	// re-parenting
   276  	if action.ParentName != "" {
   277  		return fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName)
   278  	}
   279  
   280  	modifiedGrps := []*quota.Group{grp}
   281  
   282  	// now ensure that all of the snaps mentioned in AddSnaps exist as snaps and
   283  	// that they aren't already in an existing quota group
   284  	if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
   285  		return err
   286  	}
   287  
   288  	// append the snaps list in the group
   289  	grp.Snaps = append(grp.Snaps, action.AddSnaps...)
   290  
   291  	// if the memory limit is not zero then change it too
   292  	if action.MemoryLimit != 0 {
   293  		// we disallow decreasing the memory limit because it is difficult to do
   294  		// so correctly with the current state of our code in
   295  		// EnsureSnapServices, see comment in ensureSnapServicesForGroup for
   296  		// full details
   297  		if action.MemoryLimit < grp.MemoryLimit {
   298  			return fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit")
   299  		}
   300  		grp.MemoryLimit = action.MemoryLimit
   301  	}
   302  
   303  	// update the quota group state
   304  	allGrps, err := patchQuotas(st, modifiedGrps...)
   305  	if err != nil {
   306  		return err
   307  	}
   308  
   309  	// ensure service states are updated
   310  	opts := &ensureSnapServicesForGroupOptions{
   311  		allGrps: allGrps,
   312  	}
   313  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   314  }
   315  
   316  type ensureSnapServicesForGroupOptions struct {
   317  	// allGrps is the updated set of quota groups
   318  	allGrps map[string]*quota.Group
   319  
   320  	// extraSnaps is the set of extra snaps to consider when ensuring services,
   321  	// mainly only used when snaps are removed from quota groups
   322  	extraSnaps []string
   323  }
   324  
   325  // ensureSnapServicesForGroup will handle updating changes to a given quota
   326  // group on disk, including re-generating systemd slice files, restarting snap
   327  // services that have moved into or out of quota groups, as well as starting
   328  // newly created quota groups and stopping and removing removed quota groups.
   329  // This function is idempotent, in that it can be called multiple times with
   330  // the same changes to be processed and nothing will be broken. This is mainly
   331  // a consequence of calling wrappers.EnsureSnapServices().
   332  // Currently, it only supports handling a single group change.
   333  func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions, meter progress.Meter, perfTimings *timings.Timings) error {
   334  	if opts == nil {
   335  		return fmt.Errorf("internal error: unset group information for ensuring")
   336  	}
   337  
   338  	allGrps := opts.allGrps
   339  
   340  	if meter == nil {
   341  		meter = progress.Null
   342  	}
   343  
   344  	if perfTimings == nil {
   345  		perfTimings = &timings.Timings{}
   346  	}
   347  
   348  	// extraSnaps []string, meter progress.Meter, perfTimings *timings.Timings
   349  	// build the map of snap infos to options to provide to EnsureSnapServices
   350  	snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{}
   351  	for _, sn := range append(grp.Snaps, opts.extraSnaps...) {
   352  		info, err := snapstate.CurrentInfo(st, sn)
   353  		if err != nil {
   354  			return err
   355  		}
   356  
   357  		opts, err := SnapServiceOptions(st, sn, allGrps)
   358  		if err != nil {
   359  			return err
   360  		}
   361  
   362  		snapSvcMap[info] = opts
   363  	}
   364  
   365  	// TODO: the following lines should maybe be EnsureOptionsForDevice() or
   366  	// something since it is duplicated a few places
   367  	ensureOpts := &wrappers.EnsureSnapServicesOptions{
   368  		Preseeding: snapdenv.Preseeding(),
   369  	}
   370  
   371  	// set RequireMountedSnapdSnap if we are on UC18+ only
   372  	deviceCtx, err := snapstate.DeviceCtx(st, nil, nil)
   373  	if err != nil {
   374  		return err
   375  	}
   376  
   377  	if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" {
   378  		ensureOpts.RequireMountedSnapdSnap = true
   379  	}
   380  
   381  	grpsToStart := []*quota.Group{}
   382  	appsToRestartBySnap := map[*snap.Info][]*snap.AppInfo{}
   383  
   384  	collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) {
   385  		switch unitType {
   386  		case "slice":
   387  			// this slice was either modified or written for the first time
   388  
   389  			// There are currently 3 possible cases that have different
   390  			// operations required, but we ignore one of them, so there really
   391  			// are just 2 cases we care about:
   392  			// 1. If this slice was initially written, we just need to systemctl
   393  			//    start it
   394  			// 2. If the slice was modified to be given more resources (i.e. a
   395  			//    higher memory limit), then we just need to do a daemon-reload
   396  			//    which causes systemd to modify the cgroup which will always
   397  			//    work since a cgroup can be atomically given more resources
   398  			//    without issue since the cgroup can't be using more than the
   399  			//    current limit.
   400  			// 3. If the slice was modified to be given _less_ resources (i.e. a
   401  			//    lower memory limit), then we need to stop the services before
   402  			//    issuing the daemon-reload to systemd, then do the
   403  			//    daemon-reload which will succeed in modifying the cgroup, then
   404  			//    start the services we stopped back up again. This is because
   405  			//    otherwise if the services are currently running and using more
   406  			//    resources than they would be allowed after the modification is
   407  			//    applied by systemd to the cgroup, the kernel responds with
   408  			//    EBUSY, and it isn't clear if the modification is then properly
   409  			//    in place or not.
   410  			//
   411  			// We will already have called daemon-reload at the end of
   412  			// EnsureSnapServices directly, so handling case 3 is difficult, and
   413  			// for now we disallow making this sort of change to a quota group,
   414  			// that logic is handled at a higher level than this function.
   415  			// Thus the only decision we really have to make is if the slice was
   416  			// newly written or not, and if it was save it for later
   417  			if old == "" {
   418  				grpsToStart = append(grpsToStart, grp)
   419  			}
   420  
   421  		case "service":
   422  			// in this case, the only way that a service could have been changed
   423  			// was if it was moved into or out of a slice, in both cases we need
   424  			// to restart the service
   425  			sn := app.Snap
   426  			appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app)
   427  
   428  			// TODO: what about sockets and timers? activation units just start
   429  			// the full unit, so as long as the full unit is restarted we should
   430  			// be okay?
   431  		}
   432  	}
   433  	if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meter); err != nil {
   434  		return err
   435  	}
   436  
   437  	if ensureOpts.Preseeding {
   438  		return nil
   439  	}
   440  
   441  	// TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()?
   442  	systemSysd := systemd.New(systemd.SystemMode, meter)
   443  
   444  	// now start the slices
   445  	for _, grp := range grpsToStart {
   446  		// TODO: what should these timeouts for stopping/restart slices be?
   447  		if err := systemSysd.Start(grp.SliceFileName()); err != nil {
   448  			return err
   449  		}
   450  	}
   451  
   452  	// after starting all the grps that we modified from EnsureSnapServices,
   453  	// we need to handle the case where a quota was removed, this will only
   454  	// happen one at a time and can be identified by the grp provided to us
   455  	// not existing in the state
   456  	if _, ok := allGrps[grp.Name]; !ok {
   457  		// stop the quota group, then remove it
   458  		if !ensureOpts.Preseeding {
   459  			if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil {
   460  				logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err)
   461  			}
   462  		}
   463  
   464  		// TODO: this results in a second systemctl daemon-reload which is
   465  		// undesirable, we should figure out how to do this operation with a
   466  		// single daemon-reload
   467  		err := wrappers.RemoveQuotaGroup(grp, meter)
   468  		if err != nil {
   469  			return err
   470  		}
   471  	}
   472  
   473  	// after we have made all the persistent modifications to disk and state,
   474  	// set the task as done, what remains for this task handler is just to
   475  	// restart services which will happen regardless if we get rebooted after
   476  	// unlocking the state - if we got rebooted before unlocking the state, none
   477  	// of the changes we made to state would be persisted and we would run
   478  	// through everything above here again, but the second time around
   479  	// EnsureSnapServices would end up doing nothing since it is idempotent.
   480  	if t != nil {
   481  		t.SetStatus(state.DoneStatus)
   482  	}
   483  
   484  	// now restart the services for each snap that was newly moved into a quota
   485  	// group
   486  
   487  	// iterate in a sorted order over the snaps to restart their apps for easy
   488  	// tests
   489  	snaps := make([]*snap.Info, 0, len(appsToRestartBySnap))
   490  	for sn := range appsToRestartBySnap {
   491  		snaps = append(snaps, sn)
   492  	}
   493  
   494  	sort.Slice(snaps, func(i, j int) bool {
   495  		return snaps[i].InstanceName() < snaps[j].InstanceName()
   496  	})
   497  
   498  	for _, sn := range snaps {
   499  		startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn])
   500  		if err != nil {
   501  			return err
   502  		}
   503  
   504  		st.Unlock()
   505  		err = wrappers.RestartServices(startupOrdered, nil, nil, meter, perfTimings)
   506  		st.Lock()
   507  
   508  		if err != nil {
   509  			return err
   510  		}
   511  	}
   512  	return nil
   513  }
   514  
   515  func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error {
   516  	for _, name := range snaps {
   517  		// validate that the snap exists
   518  		_, err := snapstate.CurrentInfo(st, name)
   519  		if err != nil {
   520  			return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err)
   521  		}
   522  
   523  		// check that the snap is not already in a group
   524  		for _, grp := range allGrps {
   525  			if strutil.ListContains(grp.Snaps, name) {
   526  				return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name)
   527  			}
   528  		}
   529  	}
   530  
   531  	return nil
   532  }