github.com/david-imola/snapd@v0.0.0-20210611180407-2de8ddeece6d/overlord/servicestate/quota_handlers.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2021 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package servicestate
    21  
    22  import (
    23  	"fmt"
    24  	"sort"
    25  	"time"
    26  
    27  	tomb "gopkg.in/tomb.v2"
    28  
    29  	"github.com/snapcore/snapd/gadget/quantity"
    30  	"github.com/snapcore/snapd/logger"
    31  	"github.com/snapcore/snapd/overlord/snapstate"
    32  	"github.com/snapcore/snapd/overlord/state"
    33  	"github.com/snapcore/snapd/progress"
    34  	"github.com/snapcore/snapd/snap"
    35  	"github.com/snapcore/snapd/snap/quota"
    36  	"github.com/snapcore/snapd/snapdenv"
    37  	"github.com/snapcore/snapd/strutil"
    38  	"github.com/snapcore/snapd/systemd"
    39  	"github.com/snapcore/snapd/timings"
    40  	"github.com/snapcore/snapd/wrappers"
    41  )
    42  
    43  // QuotaControlAction is the serialized representation of a quota group
    44  // modification that lives in a task.
    45  type QuotaControlAction struct {
    46  	// QuotaName is the name of the quota group being controlled.
    47  	QuotaName string `json:"quota-name"`
    48  
    49  	// Action is the action being taken on the quota group. It can be either
    50  	// "create", "update", or "remove".
    51  	Action string `json:"action"`
    52  
    53  	// AddSnaps is the set of snaps to add to the quota group, valid for either
    54  	// the "update" or the "create" actions.
    55  	AddSnaps []string `json:"snaps"`
    56  
    57  	// MemoryLimit is the memory limit for the quota group being controlled,
    58  	// either the initial limit the group is created with for the "create"
    59  	// action, or if non-zero for the "update" the memory limit, then the new
    60  	// value to be set.
    61  	MemoryLimit quantity.Size
    62  
    63  	// ParentName is the name of the parent for the quota group if it is being
    64  	// created. Eventually this could be used with the "update" action to
    65  	// support moving quota groups from one parent to another, but that is
    66  	// currently not supported.
    67  	ParentName string
    68  }
    69  
    70  func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error {
    71  	st := t.State()
    72  	st.Lock()
    73  	defer st.Unlock()
    74  
    75  	perfTimings := state.TimingsForTask(t)
    76  	defer perfTimings.Save(st)
    77  
    78  	meter := snapstate.NewTaskProgressAdapterUnlocked(t)
    79  
    80  	qcs := []QuotaControlAction{}
    81  	err := t.Get("quota-control-actions", &qcs)
    82  	if err != nil {
    83  		return fmt.Errorf("internal error: cannot get quota-control-action: %v", err)
    84  	}
    85  
    86  	// TODO: support more than one action
    87  	switch {
    88  	case len(qcs) > 1:
    89  		return fmt.Errorf("multiple quota group actions not supported yet")
    90  	case len(qcs) == 0:
    91  		return fmt.Errorf("internal error: no quota group actions for quota-control task")
    92  	}
    93  
    94  	qc := qcs[0]
    95  
    96  	allGrps, err := AllQuotas(st)
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	switch qc.Action {
   102  	case "create":
   103  		err = quotaCreate(st, t, qc, allGrps, meter, perfTimings)
   104  	case "remove":
   105  		err = quotaRemove(st, t, qc, allGrps, meter, perfTimings)
   106  	case "update":
   107  		err = quotaUpdate(st, t, qc, allGrps, meter, perfTimings)
   108  	default:
   109  		err = fmt.Errorf("unknown action %q requested", qc.Action)
   110  	}
   111  
   112  	return err
   113  }
   114  
   115  func quotaCreate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   116  	// make sure the group does not exist yet
   117  	if _, ok := allGrps[action.QuotaName]; ok {
   118  		return fmt.Errorf("group %q already exists", action.QuotaName)
   119  	}
   120  
   121  	// make sure the memory limit is not zero
   122  	// TODO: this needs to be updated to 4K when PR snapcore/snapd#10346 lands
   123  	// and an equivalent check needs to be put back into CreateQuota() before
   124  	// the tasks are created
   125  	if action.MemoryLimit == 0 {
   126  		return fmt.Errorf("internal error, MemoryLimit option is mandatory for create action")
   127  	}
   128  
   129  	// make sure the specified snaps exist and aren't currently in another group
   130  	if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
   131  		return err
   132  	}
   133  
   134  	grp, allGrps, err := quotaCreateImpl(st, action, allGrps)
   135  	if err != nil {
   136  		return err
   137  	}
   138  
   139  	// ensure the snap services with the group
   140  	opts := &ensureSnapServicesForGroupOptions{
   141  		allGrps: allGrps,
   142  	}
   143  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   144  }
   145  
   146  func quotaCreateImpl(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) {
   147  	// make sure that the parent group exists if we are creating a sub-group
   148  	var grp *quota.Group
   149  	var err error
   150  	updatedGrps := []*quota.Group{}
   151  	if action.ParentName != "" {
   152  		parentGrp, ok := allGrps[action.ParentName]
   153  		if !ok {
   154  			return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName)
   155  		}
   156  
   157  		grp, err = parentGrp.NewSubGroup(action.QuotaName, action.MemoryLimit)
   158  		if err != nil {
   159  			return nil, nil, err
   160  		}
   161  
   162  		updatedGrps = append(updatedGrps, parentGrp)
   163  	} else {
   164  		// make a new group
   165  		grp, err = quota.NewGroup(action.QuotaName, action.MemoryLimit)
   166  		if err != nil {
   167  			return nil, nil, err
   168  		}
   169  	}
   170  	updatedGrps = append(updatedGrps, grp)
   171  
   172  	// put the snaps in the group
   173  	grp.Snaps = action.AddSnaps
   174  	// update the modified groups in state
   175  	newAllGrps, err := patchQuotas(st, updatedGrps...)
   176  	if err != nil {
   177  		return nil, nil, err
   178  	}
   179  
   180  	return grp, newAllGrps, nil
   181  }
   182  
   183  func quotaRemove(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   184  	// make sure the group exists
   185  	grp, ok := allGrps[action.QuotaName]
   186  	if !ok {
   187  		return fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName)
   188  	}
   189  
   190  	// make sure some of the options are not set, it's an internal error if
   191  	// anything other than the name and action are set for a removal
   192  	if action.ParentName != "" {
   193  		return fmt.Errorf("internal error, ParentName option cannot be used with remove action")
   194  	}
   195  
   196  	if len(action.AddSnaps) != 0 {
   197  		return fmt.Errorf("internal error, AddSnaps option cannot be used with remove action")
   198  	}
   199  
   200  	if action.MemoryLimit != 0 {
   201  		return fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action")
   202  	}
   203  
   204  	// XXX: remove this limitation eventually
   205  	if len(grp.SubGroups) != 0 {
   206  		return fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first")
   207  	}
   208  
   209  	// if this group has a parent, we need to remove the linkage to this
   210  	// sub-group from the parent first
   211  	if grp.ParentGroup != "" {
   212  		// the parent here must exist otherwise AllQuotas would have failed
   213  		// because state would have been inconsistent
   214  		parent := allGrps[grp.ParentGroup]
   215  
   216  		// ensure that the parent group of this group no longer mentions this
   217  		// group as a sub-group - we know that it must since AllQuotas validated
   218  		// the state for us
   219  		if len(parent.SubGroups) == 1 {
   220  			// this group was an only child, so clear the whole list
   221  			parent.SubGroups = nil
   222  		} else {
   223  			// we have to delete the child but keep the other children
   224  			newSubgroups := make([]string, 0, len(parent.SubGroups)-1)
   225  			for _, sub := range parent.SubGroups {
   226  				if sub != action.QuotaName {
   227  					newSubgroups = append(newSubgroups, sub)
   228  				}
   229  			}
   230  
   231  			parent.SubGroups = newSubgroups
   232  		}
   233  
   234  		allGrps[grp.ParentGroup] = parent
   235  	}
   236  
   237  	// now delete the group from state - do this first for convenience to ensure
   238  	// that we can just use SnapServiceOptions below and since it operates via
   239  	// state, it will immediately reflect the deletion
   240  	delete(allGrps, action.QuotaName)
   241  
   242  	// make sure that the group set is consistent before saving it - we may need
   243  	// to delete old links from this group's parent to the child
   244  	if err := quota.ResolveCrossReferences(allGrps); err != nil {
   245  		return fmt.Errorf("cannot remove quota %q: %v", action.QuotaName, err)
   246  	}
   247  
   248  	// now set it in state
   249  	st.Set("quotas", allGrps)
   250  
   251  	// update snap service units that may need to be re-written because they are
   252  	// not in a slice anymore
   253  	opts := &ensureSnapServicesForGroupOptions{
   254  		allGrps: allGrps,
   255  	}
   256  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   257  }
   258  
   259  func quotaUpdate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
   260  	// make sure the group exists
   261  	grp, ok := allGrps[action.QuotaName]
   262  	if !ok {
   263  		return fmt.Errorf("group %q does not exist", action.QuotaName)
   264  	}
   265  
   266  	// check that ParentName is not set, since we don't currently support
   267  	// re-parenting
   268  	if action.ParentName != "" {
   269  		return fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName)
   270  	}
   271  
   272  	modifiedGrps := []*quota.Group{grp}
   273  
   274  	// now ensure that all of the snaps mentioned in AddSnaps exist as snaps and
   275  	// that they aren't already in an existing quota group
   276  	if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
   277  		return err
   278  	}
   279  
   280  	// append the snaps list in the group
   281  	grp.Snaps = append(grp.Snaps, action.AddSnaps...)
   282  
   283  	// if the memory limit is not zero then change it too
   284  	if action.MemoryLimit != 0 {
   285  		// we disallow decreasing the memory limit because it is difficult to do
   286  		// so correctly with the current state of our code in
   287  		// EnsureSnapServices, see comment in ensureSnapServicesForGroup for
   288  		// full details
   289  		if action.MemoryLimit < grp.MemoryLimit {
   290  			return fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit")
   291  		}
   292  		grp.MemoryLimit = action.MemoryLimit
   293  	}
   294  
   295  	// update the quota group state
   296  	allGrps, err := patchQuotas(st, modifiedGrps...)
   297  	if err != nil {
   298  		return err
   299  	}
   300  
   301  	// ensure service states are updated
   302  	opts := &ensureSnapServicesForGroupOptions{
   303  		allGrps: allGrps,
   304  	}
   305  	return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
   306  }
   307  
   308  type ensureSnapServicesForGroupOptions struct {
   309  	// allGrps is the updated set of quota groups
   310  	allGrps map[string]*quota.Group
   311  
   312  	// extraSnaps is the set of extra snaps to consider when ensuring services,
   313  	// mainly only used when snaps are removed from quota groups
   314  	extraSnaps []string
   315  }
   316  
   317  // ensureSnapServicesForGroup will handle updating changes to a given quota
   318  // group on disk, including re-generating systemd slice files, restarting snap
   319  // services that have moved into or out of quota groups, as well as starting
   320  // newly created quota groups and stopping and removing removed quota groups.
   321  // This function is idempotent, in that it can be called multiple times with
   322  // the same changes to be processed and nothing will be broken. This is mainly
   323  // a consequence of calling wrappers.EnsureSnapServices().
   324  // Currently, it only supports handling a single group change.
   325  func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions, meter progress.Meter, perfTimings *timings.Timings) error {
   326  	if opts == nil {
   327  		return fmt.Errorf("internal error: unset group information for ensuring")
   328  	}
   329  
   330  	allGrps := opts.allGrps
   331  
   332  	if meter == nil {
   333  		meter = progress.Null
   334  	}
   335  
   336  	if perfTimings == nil {
   337  		perfTimings = &timings.Timings{}
   338  	}
   339  
   340  	// extraSnaps []string, meter progress.Meter, perfTimings *timings.Timings
   341  	// build the map of snap infos to options to provide to EnsureSnapServices
   342  	snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{}
   343  	for _, sn := range append(grp.Snaps, opts.extraSnaps...) {
   344  		info, err := snapstate.CurrentInfo(st, sn)
   345  		if err != nil {
   346  			return err
   347  		}
   348  
   349  		opts, err := SnapServiceOptions(st, sn, allGrps)
   350  		if err != nil {
   351  			return err
   352  		}
   353  
   354  		snapSvcMap[info] = opts
   355  	}
   356  
   357  	// TODO: the following lines should maybe be EnsureOptionsForDevice() or
   358  	// something since it is duplicated a few places
   359  	ensureOpts := &wrappers.EnsureSnapServicesOptions{
   360  		Preseeding: snapdenv.Preseeding(),
   361  	}
   362  
   363  	// set RequireMountedSnapdSnap if we are on UC18+ only
   364  	deviceCtx, err := snapstate.DeviceCtx(st, nil, nil)
   365  	if err != nil {
   366  		return err
   367  	}
   368  
   369  	if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" {
   370  		ensureOpts.RequireMountedSnapdSnap = true
   371  	}
   372  
   373  	grpsToStart := []*quota.Group{}
   374  	appsToRestartBySnap := map[*snap.Info][]*snap.AppInfo{}
   375  
   376  	collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) {
   377  		switch unitType {
   378  		case "slice":
   379  			// this slice was either modified or written for the first time
   380  
   381  			// There are currently 3 possible cases that have different
   382  			// operations required, but we ignore one of them, so there really
   383  			// are just 2 cases we care about:
   384  			// 1. If this slice was initially written, we just need to systemctl
   385  			//    start it
   386  			// 2. If the slice was modified to be given more resources (i.e. a
   387  			//    higher memory limit), then we just need to do a daemon-reload
   388  			//    which causes systemd to modify the cgroup which will always
   389  			//    work since a cgroup can be atomically given more resources
   390  			//    without issue since the cgroup can't be using more than the
   391  			//    current limit.
   392  			// 3. If the slice was modified to be given _less_ resources (i.e. a
   393  			//    lower memory limit), then we need to stop the services before
   394  			//    issuing the daemon-reload to systemd, then do the
   395  			//    daemon-reload which will succeed in modifying the cgroup, then
   396  			//    start the services we stopped back up again. This is because
   397  			//    otherwise if the services are currently running and using more
   398  			//    resources than they would be allowed after the modification is
   399  			//    applied by systemd to the cgroup, the kernel responds with
   400  			//    EBUSY, and it isn't clear if the modification is then properly
   401  			//    in place or not.
   402  			//
   403  			// We will already have called daemon-reload at the end of
   404  			// EnsureSnapServices directly, so handling case 3 is difficult, and
   405  			// for now we disallow making this sort of change to a quota group,
   406  			// that logic is handled at a higher level than this function.
   407  			// Thus the only decision we really have to make is if the slice was
   408  			// newly written or not, and if it was save it for later
   409  			if old == "" {
   410  				grpsToStart = append(grpsToStart, grp)
   411  			}
   412  
   413  		case "service":
   414  			// in this case, the only way that a service could have been changed
   415  			// was if it was moved into or out of a slice, in both cases we need
   416  			// to restart the service
   417  			sn := app.Snap
   418  			appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app)
   419  
   420  			// TODO: what about sockets and timers? activation units just start
   421  			// the full unit, so as long as the full unit is restarted we should
   422  			// be okay?
   423  		}
   424  	}
   425  	if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meter); err != nil {
   426  		return err
   427  	}
   428  
   429  	if ensureOpts.Preseeding {
   430  		return nil
   431  	}
   432  
   433  	// TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()?
   434  	systemSysd := systemd.New(systemd.SystemMode, meter)
   435  
   436  	// now start the slices
   437  	for _, grp := range grpsToStart {
   438  		// TODO: what should these timeouts for stopping/restart slices be?
   439  		if err := systemSysd.Start(grp.SliceFileName()); err != nil {
   440  			return err
   441  		}
   442  	}
   443  
   444  	// after starting all the grps that we modified from EnsureSnapServices,
   445  	// we need to handle the case where a quota was removed, this will only
   446  	// happen one at a time and can be identified by the grp provided to us
   447  	// not existing in the state
   448  	if _, ok := allGrps[grp.Name]; !ok {
   449  		// stop the quota group, then remove it
   450  		if !ensureOpts.Preseeding {
   451  			if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil {
   452  				logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err)
   453  			}
   454  		}
   455  
   456  		// TODO: this results in a second systemctl daemon-reload which is
   457  		// undesirable, we should figure out how to do this operation with a
   458  		// single daemon-reload
   459  		err := wrappers.RemoveQuotaGroup(grp, meter)
   460  		if err != nil {
   461  			return err
   462  		}
   463  	}
   464  
   465  	// after we have made all the persistent modifications to disk and state,
   466  	// set the task as done, what remains for this task handler is just to
   467  	// restart services which will happen regardless if we get rebooted after
   468  	// unlocking the state - if we got rebooted before unlocking the state, none
   469  	// of the changes we made to state would be persisted and we would run
   470  	// through everything above here again, but the second time around
   471  	// EnsureSnapServices would end up doing nothing since it is idempotent.
   472  	if t != nil {
   473  		t.SetStatus(state.DoneStatus)
   474  	}
   475  
   476  	// now restart the services for each snap that was newly moved into a quota
   477  	// group
   478  
   479  	// iterate in a sorted order over the snaps to restart their apps for easy
   480  	// tests
   481  	snaps := make([]*snap.Info, 0, len(appsToRestartBySnap))
   482  	for sn := range appsToRestartBySnap {
   483  		snaps = append(snaps, sn)
   484  	}
   485  
   486  	sort.Slice(snaps, func(i, j int) bool {
   487  		return snaps[i].InstanceName() < snaps[j].InstanceName()
   488  	})
   489  
   490  	for _, sn := range snaps {
   491  		st.Unlock()
   492  		disabledSvcs, err := wrappers.QueryDisabledServices(sn, meter)
   493  		st.Lock()
   494  		if err != nil {
   495  			return err
   496  		}
   497  
   498  		isDisabledSvc := make(map[string]bool, len(disabledSvcs))
   499  		for _, svc := range disabledSvcs {
   500  			isDisabledSvc[svc] = true
   501  		}
   502  
   503  		startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn])
   504  		if err != nil {
   505  			return err
   506  		}
   507  
   508  		// drop disabled services from the startup ordering
   509  		startupOrderedMinusDisabled := make([]*snap.AppInfo, 0, len(startupOrdered)-len(disabledSvcs))
   510  
   511  		for _, svc := range startupOrdered {
   512  			if !isDisabledSvc[svc.ServiceName()] {
   513  				startupOrderedMinusDisabled = append(startupOrderedMinusDisabled, svc)
   514  			}
   515  		}
   516  
   517  		st.Unlock()
   518  		err = wrappers.RestartServices(startupOrderedMinusDisabled, nil, meter, perfTimings)
   519  		st.Lock()
   520  
   521  		if err != nil {
   522  			return err
   523  		}
   524  	}
   525  	return nil
   526  }
   527  
   528  func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error {
   529  	for _, name := range snaps {
   530  		// validate that the snap exists
   531  		_, err := snapstate.CurrentInfo(st, name)
   532  		if err != nil {
   533  			return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err)
   534  		}
   535  
   536  		// check that the snap is not already in a group
   537  		for _, grp := range allGrps {
   538  			if strutil.ListContains(grp.Snaps, name) {
   539  				return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name)
   540  			}
   541  		}
   542  	}
   543  
   544  	return nil
   545  }