gitee.com/mysnapcore/mysnapd@v0.1.0/snap/quota/quota.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2021 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package quota defines state structures for resource quota groups
    21  // for snaps.
    22  package quota
    23  
    24  import (
    25  	"bytes"
    26  	"fmt"
    27  	"path/filepath"
    28  	"runtime"
    29  	"sort"
    30  	"time"
    31  
    32  	// TODO: move this to snap/quantity? or similar
    33  	"gitee.com/mysnapcore/mysnapd/dirs"
    34  	"gitee.com/mysnapcore/mysnapd/gadget/quantity"
    35  	"gitee.com/mysnapcore/mysnapd/progress"
    36  	"gitee.com/mysnapcore/mysnapd/snap/naming"
    37  	"gitee.com/mysnapcore/mysnapd/systemd"
    38  )
    39  
    40  // export it for test
    41  var runtimeNumCPU = runtime.NumCPU
    42  
    43  // GroupQuotaCPU contains the different knobs that can be tuned
    44  // for cpu quota limits. The allowed CPU percentage to use is split across two limits
    45  // to better support a inituitive way of setting the limits.
    46  type GroupQuotaCPU struct {
    47  	// Count is the multiplier that is used in combination with the
    48  	// percentage parameter to determine the final CPU resource constraint value.
    49  	// The value is a positive integer or 0. A value of 0 will be treated as 1.
    50  	Count int `json:"count,omitempty"`
    51  
    52  	// Percentage is a positive integer between 0 and 100. It is used to together with
    53  	// the Count parameter to determine the final CPU resource constraint value. The value
    54  	// written to the systemd slice will be Count*Percentage. A value of 0 means that the limit
    55  	// in Percentage and Count is ignored.
    56  	Percentage int `json:"percentage,omitempty"`
    57  
    58  	// CPUSet is a list of CPU core indices that are allowed to be used by the group. Each value
    59  	// in the list refers to the CPU core number. If the list is empty, all CPU cores are allowed.
    60  	CPUSet []int `json:"allowed-cpus,omitempty"`
    61  }
    62  
    63  // GroupQuotaJournal contains the supported limits for journald. Any limit set here
    64  // applies only to the quota group itself. Journal limits will not be inherited by the
    65  // sub-groups as this behaviour is not supported by systemd.
    66  type GroupQuotaJournal struct {
    67  	// Size is the maximum allowed size of the journal for the group.
    68  	// If the size is set below current usage, systemd will automatically treat
    69  	// the current usage of the journald namespace as the minimum limit and
    70  	// render whatever set here ineffective. The maximum allowed size for
    71  	// journald namespaces is 4GB. A value of 0 here means no limit is present.
    72  	Size quantity.Size `json:"size,omitempty"`
    73  
    74  	// RateEnabled tells us whether or not the values provided in RateCount and
    75  	// RatePeriod should be written.
    76  	RateEnabled bool `json:"rate-enabled,omitempty"`
    77  	// RateCount is the number of messages allowed each RatePeriod. A zero value
    78  	// in this field will disable the rate-limit.
    79  	RateCount int `json:"rate-count,omitempty"`
    80  	// RatePeriod is the time-period for when the rate resets. Each RatePeriod,
    81  	// RateCount number of messages is allowed. A zero value in this field will
    82  	// disable the rate-limit.
    83  	RatePeriod time.Duration `json:"rate-period,omitempty"`
    84  }
    85  
    86  // Group is a quota group of snaps, services or sub-groups that are all subject
    87  // to specific resource quotas. The only quota resource types currently
    88  // supported is memory, but this can be expanded in the future.
    89  type Group struct {
    90  	// Name is the name of the quota group. This name is used the
    91  	// name of the systemd slice underlying the quota group.
    92  	// Certain names are reserved for future use: system, snapd, root, user.
    93  	// Otherwise names following the same rules as snap names can be used.
    94  	Name string `json:"name,omitempty"`
    95  
    96  	// SubGroups is the set of sub-groups that are subject to this quota.
    97  	// Sub-groups have their own limits, subject to the requirement that the
    98  	// highest quota for a sub-group is that of the parent group.
    99  	SubGroups []string `json:"sub-groups,omitempty"`
   100  
   101  	// subGroups is the set of actual sub-group objects, needed for tracking and
   102  	// calculations
   103  	subGroups []*Group
   104  
   105  	// MemoryLimit is the limit of memory available to the processes in the
   106  	// group where if the total used memory of all the processes exceeds the
   107  	// limit, oom-killer is invoked which will start killing processes. The
   108  	// specific behavior of which processes are killed is subject to the
   109  	// ExhaustionBehavior. MemoryLimit is expressed in bytes.
   110  	MemoryLimit quantity.Size `json:"memory-limit,omitempty"`
   111  
   112  	// CPULimit is the quotas for the cpu and consists of a couple of nubs.
   113  	// It is possible to control the percentage of the cpu available for the group
   114  	// and which cores (requires cgroupsv2) are allowed to be used.
   115  	CPULimit *GroupQuotaCPU `json:"cpu-limit,omitempty"`
   116  
   117  	// ThreadLimit is the limit of threads/processes that can be active at once in
   118  	// the group. Once the limit is reached, further forks() or clones() will be blocked
   119  	// for processes in the group.
   120  	ThreadLimit int `json:"task-limit,omitempty"`
   121  
   122  	// JournalLimit is the limits that apply to the journal for this quota group. When
   123  	// this limit is present, then the quota group will be assigned a log namespace for
   124  	// journald.
   125  	JournalLimit *GroupQuotaJournal `json:"journal-limit,omitempty"`
   126  
   127  	// ParentGroup is the the parent group that this group is a child of. If it
   128  	// is empty, then this is a "root" quota group.
   129  	ParentGroup string `json:"parent-group,omitempty"`
   130  
   131  	// parentGroup is the actual parent group object, needed for tracking and
   132  	// calculations
   133  	parentGroup *Group
   134  
   135  	// Snaps is the set of snaps that is part of this quota group. If this is
   136  	// empty then the underlying slice may not exist on the system.
   137  	Snaps []string `json:"snaps,omitempty"`
   138  }
   139  
   140  // NewGroup creates a new top quota group with the given name and memory limit.
   141  func NewGroup(name string, resourceLimits Resources) (*Group, error) {
   142  	grp := &Group{
   143  		Name: name,
   144  	}
   145  
   146  	if err := grp.UpdateQuotaLimits(resourceLimits); err != nil {
   147  		return nil, err
   148  	}
   149  
   150  	if err := grp.validate(); err != nil {
   151  		return nil, err
   152  	}
   153  
   154  	return grp, nil
   155  }
   156  
   157  func (grp *Group) GetQuotaResources() Resources {
   158  	resourcesBuilder := NewResourcesBuilder()
   159  	if grp.MemoryLimit != 0 {
   160  		resourcesBuilder.WithMemoryLimit(grp.MemoryLimit)
   161  	}
   162  	if grp.CPULimit != nil {
   163  		if grp.CPULimit.Count != 0 {
   164  			resourcesBuilder.WithCPUCount(grp.CPULimit.Count)
   165  		}
   166  		if grp.CPULimit.Percentage != 0 {
   167  			resourcesBuilder.WithCPUPercentage(grp.CPULimit.Percentage)
   168  		}
   169  		if len(grp.CPULimit.CPUSet) != 0 {
   170  			resourcesBuilder.WithCPUSet(grp.CPULimit.CPUSet)
   171  		}
   172  	}
   173  	if grp.ThreadLimit != 0 {
   174  		resourcesBuilder.WithThreadLimit(grp.ThreadLimit)
   175  	}
   176  	if grp.JournalLimit != nil {
   177  		resourcesBuilder.WithJournalNamespace()
   178  		if grp.JournalLimit.Size != 0 {
   179  			resourcesBuilder.WithJournalSize(grp.JournalLimit.Size)
   180  		}
   181  		// We cannot just check for RateCount and RatePeriod and call WithJournalRate()
   182  		// only if both are non-zero, because not calling WithJournalRate() causes the
   183  		// system's default rate count and rate period to be used; what we really want
   184  		// here is to be able to completely disable the rate-limit for a journal quota.
   185  		if grp.JournalLimit.RateEnabled {
   186  			resourcesBuilder.WithJournalRate(grp.JournalLimit.RateCount, grp.JournalLimit.RatePeriod)
   187  		}
   188  	}
   189  	return resourcesBuilder.Build()
   190  }
   191  
   192  // CurrentMemoryUsage returns the current memory usage of the quota group. For
   193  // quota groups which do not yet have a backing systemd slice on the system (
   194  // i.e. quota groups without any snaps in them), the memory usage is reported as
   195  // 0.
   196  func (grp *Group) CurrentMemoryUsage() (quantity.Size, error) {
   197  	sysd := systemd.New(systemd.SystemMode, progress.Null)
   198  
   199  	// check if this group is actually active, it could not physically exist yet
   200  	// since it has no snaps in it
   201  	isActive, err := sysd.IsActive(grp.SliceFileName())
   202  	if err != nil {
   203  		return 0, err
   204  	}
   205  	if !isActive {
   206  		return 0, nil
   207  	}
   208  
   209  	mem, err := sysd.CurrentMemoryUsage(grp.SliceFileName())
   210  	if err != nil {
   211  		return 0, err
   212  	}
   213  
   214  	return mem, nil
   215  }
   216  
   217  // CurrentTaskUsage returns the current task (processes, threads) usage of the quota group.
   218  // For quota groups which do not yet have a backing systemd slice on the system (
   219  // i.e. quota groups without any snaps in them), the task usage is reported
   220  // as 0
   221  func (grp *Group) CurrentTaskUsage() (int, error) {
   222  	sysd := systemd.New(systemd.SystemMode, progress.Null)
   223  
   224  	// check if this group is actually active, it could not physically exist yet
   225  	// since it has no snaps in it
   226  	isActive, err := sysd.IsActive(grp.SliceFileName())
   227  	if err != nil {
   228  		return 0, err
   229  	}
   230  	if !isActive {
   231  		return 0, nil
   232  	}
   233  
   234  	count, err := sysd.CurrentTasksCount(grp.SliceFileName())
   235  	if err != nil {
   236  		return 0, err
   237  	}
   238  	return int(count), nil
   239  }
   240  
   241  // SliceFileName returns the name of the slice file that should be used for this
   242  // quota group. This name will include all of the group's parents in the name.
   243  // For example, a group named "bar" that is a child of the "foo" group will have
   244  // a systemd slice name as "snap.foo-bar.slice". Note that the slice name may
   245  // differ from the snapd friendly group name, mainly in the case that the group
   246  // is a sub group.
   247  func (grp *Group) SliceFileName() string {
   248  	escapedGrpName := systemd.EscapeUnitNamePath(grp.Name)
   249  	if grp.ParentGroup == "" {
   250  		// root group name, then the slice unit is just "<name>.slice"
   251  		return fmt.Sprintf("snap.%s.slice", escapedGrpName)
   252  	}
   253  
   254  	// otherwise we need to track back to get all of the parent elements
   255  	grpNames := []string{}
   256  	parentGrp := grp.parentGroup
   257  	for parentGrp != nil {
   258  		grpNames = append([]string{parentGrp.Name}, grpNames...)
   259  		parentGrp = parentGrp.parentGroup
   260  	}
   261  
   262  	buf := &bytes.Buffer{}
   263  	fmt.Fprintf(buf, "snap.")
   264  	for _, parentGrpName := range grpNames {
   265  		fmt.Fprintf(buf, "%s-", systemd.EscapeUnitNamePath(parentGrpName))
   266  	}
   267  	fmt.Fprintf(buf, "%s.slice", escapedGrpName)
   268  	return buf.String()
   269  }
   270  
   271  // JournalNamespaceName returns the snap formatted name of the log namespace
   272  func (grp *Group) JournalNamespaceName() string {
   273  	return fmt.Sprintf("snap-%s", grp.Name)
   274  }
   275  
   276  // JournalConfFileName returns the name of the journal configuration file that should
   277  // be used for this quota group. As an example, a group named "foo" will return a name
   278  // of journald@snap-foo.conf
   279  func (grp *Group) JournalConfFileName() string {
   280  	return fmt.Sprintf("journald@%s.conf", grp.JournalNamespaceName())
   281  }
   282  
   283  // JournalServiceName returns the systemd service name for the quota group.
   284  func (grp *Group) JournalServiceName() string {
   285  	return fmt.Sprintf("systemd-journald@%s.service", grp.JournalNamespaceName())
   286  }
   287  
   288  // JournalServiceFile returns the directory specific to this quota group for
   289  // its journal service unit drop-in.
   290  func (grp *Group) JournalServiceDropInDir() string {
   291  	return filepath.Join(dirs.SnapServicesDir, grp.JournalServiceName()+".d")
   292  }
   293  
   294  // JournalServiceDropInFile returns the full path to the journal service unit drop-in
   295  // file for the quota group.
   296  func (grp *Group) JournalServiceDropInFile() string {
   297  	return filepath.Join(grp.JournalServiceDropInDir(), "00-snap.conf")
   298  }
   299  
   300  // groupQuotaAllocations contains information about current quotas of a group
   301  // and is used by getQuotaAllocations to contain this information. This only accounts
   302  // for quotas that support inheritance, which currently does not include journal quotas.
   303  // There are two types of values for each quota - the quota limit set by this group,
   304  // and the quota reserved by children of this group. Examples:
   305  // Group that has a non-memory quota, but has a child group that has a memory quota of 512mb:
   306  // memoryLimit = 0
   307  // memoryReserved = 512 mb
   308  // Group that has a memory quota of 512mb, but has only children groups with non-memory quota:
   309  // memoryLimit = 512 mb
   310  // memoryReserved = 0
   311  // Group that has a memory quota of 512mb, and has a child group that has a memory quota of 256mb:
   312  // memoryLimit = 512 mb
   313  // memoryReserved = 256 mb
   314  // If the limit value is non-zero, then the reserved value can never be greater than the limit, however
   315  // if the limit is zero, then the reserved value must be below the nearest non-zero limit as you traverse
   316  // up the tree.
   317  type groupQuotaAllocations struct {
   318  	MemoryLimit              quantity.Size
   319  	MemoryReservedByChildren quantity.Size
   320  
   321  	CPULimit              int
   322  	CPUReservedByChildren int
   323  
   324  	ThreadsLimit              int
   325  	ThreadsReservedByChildren int
   326  
   327  	CPUSetLimit              []int
   328  	CPUSetReservedByChildren []int
   329  }
   330  
   331  func max(a, b int) int {
   332  	if a > b {
   333  		return a
   334  	}
   335  	return b
   336  }
   337  
   338  func maxq(a, b quantity.Size) quantity.Size {
   339  	if a > b {
   340  		return a
   341  	}
   342  	return b
   343  }
   344  
   345  // GetLocalCPUSetQuota returns the current CPU set quota for the group. This
   346  // does not return any inheritted CPU set quota.
   347  func (grp *Group) GetLocalCPUSetQuota() []int {
   348  	if grp.CPULimit == nil || len(grp.CPULimit.CPUSet) == 0 {
   349  		return []int{}
   350  	}
   351  	return grp.CPULimit.CPUSet
   352  }
   353  
   354  // GetCPUSetQuota returns the currently active CPU set quota for this group, which
   355  // includes the case where the CPU set is inherited from a parent group.
   356  func (grp *Group) GetCPUSetQuota() []int {
   357  	localCPUSet := grp.GetLocalCPUSetQuota()
   358  	if len(localCPUSet) != 0 {
   359  		return localCPUSet
   360  	}
   361  
   362  	parent := grp.parentGroup
   363  	for parent != nil {
   364  		if parent.CPULimit != nil && len(parent.CPULimit.CPUSet) != 0 {
   365  			return parent.CPULimit.CPUSet
   366  		}
   367  		parent = parent.parentGroup
   368  	}
   369  	return nil
   370  }
   371  
   372  // GetLocalCPUQuota returns the final calculated count and percentage of the
   373  // current CPU quota for the group. This does not return any inherited CPU quota, but
   374  // it does take any inherited CPU set into account to adjust in the case of a relative
   375  // usage percentage. If the CPU count is set to 0, then it is expected that it returns
   376  // CPULimit.Percentage times the number of all allowed cores. This is either
   377  // the full amount of cores present on the system, or it is the number of cores allowed
   378  // for this group. Otherwise this command should return the actual count and percentage
   379  // set by the group.
   380  func (grp *Group) GetLocalCPUQuota() (int, int) {
   381  	if grp.CPULimit == nil || grp.CPULimit.Percentage == 0 {
   382  		return 0, 0
   383  	}
   384  
   385  	// always use the count if set
   386  	if grp.CPULimit.Count != 0 {
   387  		return grp.CPULimit.Count, grp.CPULimit.Percentage
   388  	} else {
   389  		cpuCount := runtimeNumCPU()
   390  		cpuSetCount := len(grp.GetCPUSetQuota())
   391  		if cpuSetCount != 0 && cpuSetCount < cpuCount {
   392  			cpuCount = cpuSetCount
   393  		}
   394  		return cpuCount, grp.CPULimit.Percentage
   395  	}
   396  }
   397  
   398  func (grp *Group) getCurrentCPUAllocation() int {
   399  	count, percentage := grp.GetLocalCPUQuota()
   400  	return count * percentage
   401  }
   402  
   403  // getQuotaAllocations Recursively retrieve current group quotas statistics, this should just
   404  // be invoked on the upper parent of a group tree, and then it will gather active quotas for the
   405  // tree and store them in the allQuotas paramater
   406  func (grp *Group) getQuotaAllocations(allQuotas map[string]*groupQuotaAllocations) *groupQuotaAllocations {
   407  	limits := &groupQuotaAllocations{
   408  		MemoryLimit:  grp.MemoryLimit,
   409  		CPULimit:     grp.getCurrentCPUAllocation(),
   410  		ThreadsLimit: grp.ThreadLimit,
   411  		CPUSetLimit:  grp.GetLocalCPUSetQuota(),
   412  	}
   413  
   414  	// sliceUniqueAndSort sorts an array of ints in ascending order and removes duplicates
   415  	sliceUniqueAndSort := func(input []int) []int {
   416  		m := map[int]bool{}
   417  		for _, v := range input {
   418  			m[v] = true
   419  		}
   420  		result := []int{}
   421  		for k := range m {
   422  			result = append(result, k)
   423  		}
   424  		sort.Ints(result)
   425  		return result
   426  	}
   427  
   428  	for _, subGroup := range grp.subGroups {
   429  		// cyclic checks are made by visitTree so we make the assumption here
   430  		// that no cyclic dependencies exists.
   431  		subGroupLimits := subGroup.getQuotaAllocations(allQuotas)
   432  
   433  		// As we count up the usage of quotas across our sub-groups we must either use the actual
   434  		// limits of the below sub-group, or the actual usage of the sub-group. The reason we must do this
   435  		// is because if the sub-group doesn't have any limit set for a quota, but the sub-group has sub-groups
   436  		// itself that do have limits, then we must use that value instead. Hence the max* functions.
   437  		limits.MemoryReservedByChildren += maxq(subGroupLimits.MemoryLimit, subGroupLimits.MemoryReservedByChildren)
   438  		limits.CPUReservedByChildren += max(subGroupLimits.CPULimit, subGroupLimits.CPUReservedByChildren)
   439  		limits.ThreadsReservedByChildren += max(subGroupLimits.ThreadsLimit, subGroupLimits.ThreadsReservedByChildren)
   440  
   441  		// We need to merge the allowed CPUs lists, but we need to make sure that the list is unique, since cpu cores
   442  		// can be reused between sub-groups.
   443  		if len(subGroupLimits.CPUSetLimit) > 0 {
   444  			limits.CPUSetReservedByChildren = append(limits.CPUSetReservedByChildren, subGroupLimits.CPUSetLimit...)
   445  		} else if len(subGroupLimits.CPUSetReservedByChildren) > 0 {
   446  			limits.CPUSetReservedByChildren = append(limits.CPUSetReservedByChildren, subGroupLimits.CPUSetReservedByChildren...)
   447  		}
   448  	}
   449  
   450  	// Sort the allowed CPUs list, and remove duplicates.
   451  	if len(limits.CPUSetReservedByChildren) > 0 {
   452  		limits.CPUSetReservedByChildren = sliceUniqueAndSort(limits.CPUSetReservedByChildren)
   453  	}
   454  
   455  	// Store the retrieved limits for the group
   456  	allQuotas[grp.Name] = limits
   457  	return limits
   458  }
   459  
   460  // validateMemoryResourceFit verifies that the new memory limit doesn't conflict with the current reserved memory
   461  // limit of the group, and if not locates the nearest parent group that has a memory quota, and then verifies
   462  // if that group has any space available by checking its 'memoryReserved'. The 'memoryReserved' tells us how much
   463  // of the group quotas limit has been used already by its subgroups (excluding the one querying).
   464  func (grp *Group) validateMemoryResourceFit(allQuotas map[string]*groupQuotaAllocations, memoryLimit quantity.Size) error {
   465  
   466  	// make sure current usage does not exceed the new limit, we can avoid any
   467  	// recursive descent as we already have counted up the usage of our children.
   468  	currentLimits := allQuotas[grp.Name]
   469  	memoryReserved := grp.MemoryLimit
   470  	if currentLimits != nil {
   471  		if currentLimits.MemoryReservedByChildren > memoryLimit {
   472  			return fmt.Errorf("group memory limit of %s is too small to fit current subgroup usage of %s",
   473  				memoryLimit.IECString(), currentLimits.MemoryReservedByChildren.IECString())
   474  		}
   475  
   476  		// if we are reducing the limit, then we don't need to check upper parents,
   477  		// as we can assume it will fit by this point
   478  		if memoryLimit < grp.MemoryLimit {
   479  			return nil
   480  		}
   481  
   482  		memoryReserved = maxq(memoryReserved, currentLimits.MemoryReservedByChildren)
   483  	}
   484  
   485  	// now we check parents up the tree to make sure we also fit with any
   486  	// previous usage limits of our parents.
   487  	parent := grp.parentGroup
   488  	for parent != nil {
   489  		limits := allQuotas[parent.Name]
   490  		if limits != nil && limits.MemoryLimit != 0 {
   491  			// We need to take into account that we might have a matching limit in this group, and thus we account
   492  			// for some of the reserved memory. So subtract that.
   493  			memoryAvailable := limits.MemoryLimit - (limits.MemoryReservedByChildren - memoryReserved)
   494  			if memoryLimit > memoryAvailable {
   495  				return fmt.Errorf("sub-group memory limit of %s is too large to fit inside group %q remaining quota space %s",
   496  					memoryLimit.IECString(), parent.Name, memoryAvailable.IECString())
   497  			}
   498  			break
   499  		}
   500  		parent = parent.parentGroup
   501  	}
   502  	return nil
   503  }
   504  
   505  // validateCPUResourceFit verifies that the new cpu limit doesn't conflict with the current reserved cpu
   506  // limit of the group, and if not locates the nearest parent group that has a cpu quota, and then verifies
   507  // if that group has any space available by checking its 'cpuReserved'. The 'cpuReserved' tells us how much
   508  // of the group quotas limit has been used already by its subgroups (excluding the one querying).
   509  func (grp *Group) validateCPUResourceFit(allQuotas map[string]*groupQuotaAllocations, resourceLimits Resources) error {
   510  
   511  	// handle the zero-count case where we instead need to use the number
   512  	// of cpu cores available to use, which is either the number of cores
   513  	// on the system, or in the provided CPU set, or in a CPU set inheritted.
   514  	cpuRequested := resourceLimits.CPU.Count * resourceLimits.CPU.Percentage
   515  	if resourceLimits.CPU.Count == 0 {
   516  		cpuSetCount := len(grp.GetCPUSetQuota())
   517  		if cpuSetCount == 0 {
   518  			cpuSetCount = runtimeNumCPU()
   519  		}
   520  		cpuRequested = cpuSetCount * resourceLimits.CPU.Percentage
   521  	}
   522  
   523  	// make sure current usage does not exceed the new limit, we can avoid any
   524  	// recursive descent as we already have counted up the usage of our children.
   525  	currentLimits := allQuotas[grp.Name]
   526  
   527  	// currentLimits will be null during creation, so this statement is triggered when
   528  	// we modify limits on an existing group
   529  	var existingCPUAllocation int
   530  	if currentLimits != nil {
   531  		existingCPUAllocation = currentLimits.CPULimit
   532  		if currentLimits.CPUReservedByChildren > cpuRequested {
   533  			return fmt.Errorf("group cpu limit of %d%% is less than current subgroup usage of %d%%",
   534  				cpuRequested, currentLimits.CPUReservedByChildren)
   535  		}
   536  
   537  		// if we are reducing the limit, then we don't need to check upper parents,
   538  		// as we can assume it will fit by this point
   539  		if cpuRequested < existingCPUAllocation {
   540  			return nil
   541  		}
   542  
   543  		existingCPUAllocation = max(existingCPUAllocation, currentLimits.CPUReservedByChildren)
   544  	}
   545  
   546  	// now we check parents up the tree to make sure we also fit with any
   547  	// previous usage limits of our parents.
   548  	parent := grp.parentGroup
   549  	for parent != nil {
   550  		limits := allQuotas[parent.Name]
   551  		if limits != nil {
   552  			if limits.CPULimit != 0 {
   553  				// We need to take into account that we might have a matching limit in this group, and thus we account
   554  				// for some of the reserved amount of cpu time. So subtract that.
   555  				cpuAvailable := limits.CPULimit - (limits.CPUReservedByChildren - existingCPUAllocation)
   556  				if cpuRequested > cpuAvailable {
   557  					return fmt.Errorf("sub-group cpu limit of %d%% is too large to fit inside group %q remaining quota space %d%%",
   558  						cpuRequested, parent.Name, cpuAvailable)
   559  				}
   560  				break
   561  			} else if len(limits.CPUSetLimit) > 0 {
   562  				maxCPUAvailableInSet := len(limits.CPUSetLimit) * 100
   563  				if cpuRequested > maxCPUAvailableInSet {
   564  					return fmt.Errorf("sub-group cpu limit of %d%% is too large to fit inside group %q with allowed CPU set %v",
   565  						cpuRequested, parent.Name, limits.CPUSetLimit)
   566  				}
   567  				break
   568  			}
   569  		}
   570  		parent = parent.parentGroup
   571  	}
   572  	return nil
   573  }
   574  
   575  func contains(s []int, e int) bool {
   576  	for _, a := range s {
   577  		if a == e {
   578  			return true
   579  		}
   580  	}
   581  	return false
   582  }
   583  
   584  // validateCPUsAllowedResourceFit verifies that the new cpu-set doesn't conflict with the current reserved cpu-set
   585  // of the group, and if not locates the nearest parent group that has a cpu-set quota, and then verifies
   586  // that the requested cpu cores match a subset of the previously set allowance.
   587  func (grp *Group) validateCPUsAllowedResourceFit(allQuotas map[string]*groupQuotaAllocations, cpusAllowed []int) error {
   588  
   589  	// isSuperset returns true if a is a superset of b.
   590  	isSuperset := func(a, b []int) bool {
   591  		for _, b1 := range b {
   592  			if !contains(a, b1) {
   593  				return false
   594  			}
   595  		}
   596  		return true
   597  	}
   598  
   599  	// make sure current cpu sets don't conflict, we can avoid any
   600  	// recursive descent as we already have counted up the usage of our children.
   601  	currentLimits := allQuotas[grp.Name]
   602  	if currentLimits != nil {
   603  		if !isSuperset(cpusAllowed, currentLimits.CPUSetReservedByChildren) {
   604  			return fmt.Errorf("group cpu-set %v is not a superset of current subgroup usage of %v",
   605  				cpusAllowed, currentLimits.CPUSetReservedByChildren)
   606  		}
   607  
   608  		// If we are doing further restrictions (i.e the new cpu set is a subset of the current)
   609  		// and we got past the previous check then we don't need to check upper parents,
   610  		// we can assume by this point it will be ok
   611  		if isSuperset(grp.GetLocalCPUSetQuota(), cpusAllowed) {
   612  			return nil
   613  		}
   614  	}
   615  
   616  	// now we check parents up the tree to make sure we also fit with any
   617  	// previous usage limits of our parents.
   618  	parent := grp.parentGroup
   619  	for parent != nil {
   620  		limits := allQuotas[parent.Name]
   621  		if limits != nil && len(limits.CPUSetLimit) != 0 {
   622  			if !isSuperset(limits.CPUSetLimit, cpusAllowed) {
   623  				return fmt.Errorf("sub-group cpu-set %v is not a subset of group %q cpu-set %v",
   624  					cpusAllowed, parent.Name, limits.CPUSetLimit)
   625  			}
   626  			break
   627  		}
   628  		parent = parent.parentGroup
   629  	}
   630  	return nil
   631  }
   632  
   633  // validateThreadResourceFit verifies that the new thread limit doesn't conflict with the current reserved thread
   634  // limit of the group, and if not locates the nearest parent group that has a thread quota, and then verifies
   635  // if that group has any space available by checking its 'threadsReserved'. The 'threadsReserved' tells us how much
   636  // of the group quotas limit has been used already by its subgroups (excluding the one querying).
   637  func (grp *Group) validateThreadResourceFit(allQuotas map[string]*groupQuotaAllocations, threadLimit int) error {
   638  
   639  	// make sure current usage does not exceed the new limit, we can avoid any
   640  	// recursive descent as we already have counted up the usage of our children.
   641  	currentLimits := allQuotas[grp.Name]
   642  	threadsReserved := grp.ThreadLimit
   643  	if currentLimits != nil {
   644  		if currentLimits.ThreadsReservedByChildren > threadLimit {
   645  			return fmt.Errorf("group thread limit of %d is too small to fit current subgroup usage of %d",
   646  				threadLimit, currentLimits.ThreadsReservedByChildren)
   647  		}
   648  
   649  		// if we are reducing the limit, then we don't need to check upper parents,
   650  		// as we can assume it will fit by this point
   651  		if threadLimit < grp.ThreadLimit {
   652  			return nil
   653  		}
   654  
   655  		threadsReserved = max(threadsReserved, currentLimits.ThreadsReservedByChildren)
   656  	}
   657  
   658  	// now we check parents up the tree to make sure we also fit with any
   659  	// previous usage limits of our parents.
   660  	parent := grp.parentGroup
   661  	for parent != nil {
   662  		limits := allQuotas[parent.Name]
   663  		if limits != nil && limits.ThreadsLimit != 0 {
   664  			// We need to take into account that we might have a matching limit in this group, and thus we account
   665  			// for some of the reserved threads. So subtract that.
   666  			threadsAvailable := limits.ThreadsLimit - (limits.ThreadsReservedByChildren - threadsReserved)
   667  			if threadLimit > threadsAvailable {
   668  				return fmt.Errorf("sub-group thread limit of %d is too large to fit inside group %q remaining quota space %d",
   669  					threadLimit, parent.Name, threadsAvailable)
   670  			}
   671  			break
   672  		}
   673  		parent = parent.parentGroup
   674  	}
   675  	return nil
   676  }
   677  
   678  // validateQuotasFit verifies that the given group's current limits fits correctly
   679  // into the group's parent group's limits. This is done in multiple steps, where the first
   680  // one is to get a statistics for the upper-most parent group, to get a combined overview
   681  // of all quotas currently set and their usage. The next step is, for each quota we want to
   682  // set/change, verify that it does not exceed any previously set quota of matching type.
   683  func (grp *Group) validateQuotasFit(resourceLimits Resources) error {
   684  	upperParent := grp
   685  	for upperParent.parentGroup != nil {
   686  		upperParent = upperParent.parentGroup
   687  	}
   688  
   689  	allQuotas := make(map[string]*groupQuotaAllocations)
   690  	upperParent.getQuotaAllocations(allQuotas)
   691  
   692  	// for each limit we want to set, we need to find the closes parent
   693  	// limit that matches it, and then verify against it's usage if we have room
   694  	if resourceLimits.Memory != nil {
   695  		if err := grp.validateMemoryResourceFit(allQuotas, resourceLimits.Memory.Limit); err != nil {
   696  			return err
   697  		}
   698  	}
   699  	if resourceLimits.CPU != nil && resourceLimits.CPU.Percentage != 0 {
   700  		if err := grp.validateCPUResourceFit(allQuotas, resourceLimits); err != nil {
   701  			return err
   702  		}
   703  	}
   704  	if resourceLimits.CPUSet != nil && len(resourceLimits.CPUSet.CPUs) != 0 {
   705  		if err := grp.validateCPUsAllowedResourceFit(allQuotas, resourceLimits.CPUSet.CPUs); err != nil {
   706  			return err
   707  		}
   708  	}
   709  	if resourceLimits.Threads != nil {
   710  		if err := grp.validateThreadResourceFit(allQuotas, resourceLimits.Threads.Limit); err != nil {
   711  			return err
   712  		}
   713  	}
   714  	return nil
   715  }
   716  
   717  // UpdateQuotaLimits updates all the quota limits set for the group to the new limits
   718  // given. The limits will be validated against the group's parent group's limits, to verify
   719  // that they fit. For instance, if the parent group has a memory limit of 1GB, and the new limit
   720  // given here is 2GB, then the new limit will be rejected.
   721  func (grp *Group) UpdateQuotaLimits(resourceLimits Resources) error {
   722  	currentLimits := grp.GetQuotaResources()
   723  	if err := currentLimits.ValidateChange(resourceLimits); err != nil {
   724  		return err
   725  	}
   726  
   727  	if err := grp.validateQuotasFit(resourceLimits); err != nil {
   728  		return err
   729  	}
   730  
   731  	if resourceLimits.Memory != nil {
   732  		grp.MemoryLimit = resourceLimits.Memory.Limit
   733  	}
   734  	if resourceLimits.CPU != nil {
   735  		grp.CPULimit = &GroupQuotaCPU{
   736  			Count:      resourceLimits.CPU.Count,
   737  			Percentage: resourceLimits.CPU.Percentage,
   738  		}
   739  	}
   740  	if resourceLimits.CPUSet != nil {
   741  		if grp.CPULimit == nil {
   742  			grp.CPULimit = &GroupQuotaCPU{}
   743  		}
   744  		grp.CPULimit.CPUSet = resourceLimits.CPUSet.CPUs
   745  	}
   746  	if resourceLimits.Threads != nil {
   747  		grp.ThreadLimit = resourceLimits.Threads.Limit
   748  	}
   749  	if resourceLimits.Journal != nil {
   750  		if grp.JournalLimit == nil {
   751  			grp.JournalLimit = &GroupQuotaJournal{}
   752  		}
   753  		if resourceLimits.Journal.Size != nil {
   754  			grp.JournalLimit.Size = resourceLimits.Journal.Size.Limit
   755  		}
   756  		if resourceLimits.Journal.Rate != nil {
   757  			grp.JournalLimit.RateEnabled = true
   758  			grp.JournalLimit.RateCount = resourceLimits.Journal.Rate.Count
   759  			grp.JournalLimit.RatePeriod = resourceLimits.Journal.Rate.Period
   760  		}
   761  	}
   762  	return nil
   763  }
   764  
   765  func (grp *Group) validate() error {
   766  	if err := naming.ValidateQuotaGroup(grp.Name); err != nil {
   767  		return err
   768  	}
   769  
   770  	// check if the name is reserved for future usage
   771  	switch grp.Name {
   772  	case "root", "system", "snapd", "user":
   773  		return fmt.Errorf("group name %q reserved", grp.Name)
   774  	}
   775  
   776  	// validate the resource limits for the group
   777  	limits := grp.GetQuotaResources()
   778  	if err := limits.Validate(); err != nil {
   779  		return err
   780  	}
   781  
   782  	if grp.ParentGroup != "" && grp.Name == grp.ParentGroup {
   783  		return fmt.Errorf("group has circular parent reference to itself")
   784  	}
   785  
   786  	if len(grp.SubGroups) != 0 {
   787  		for _, subGrp := range grp.SubGroups {
   788  			if subGrp == grp.Name {
   789  				return fmt.Errorf("group has circular sub-group reference to itself")
   790  			}
   791  		}
   792  	}
   793  	return nil
   794  }
   795  
   796  // NewSubGroup creates a new sub group under the current group.
   797  func (grp *Group) NewSubGroup(name string, resourceLimits Resources) (*Group, error) {
   798  	// TODO: implement a maximum sub-group depth
   799  
   800  	subGrp := &Group{
   801  		Name:        name,
   802  		ParentGroup: grp.Name,
   803  		parentGroup: grp,
   804  	}
   805  
   806  	if err := subGrp.UpdateQuotaLimits(resourceLimits); err != nil {
   807  		return nil, err
   808  	}
   809  
   810  	// check early that the sub group name is not the same as that of the
   811  	// parent, this is fine in systemd world, but in snapd we want unique quota
   812  	// groups
   813  	if name == grp.Name {
   814  		return nil, fmt.Errorf("cannot use same name %q for sub group as parent group", name)
   815  	}
   816  
   817  	// With the new quotas we don't support groups that have a mixture of snaps and
   818  	// subgroups, as this will cause issues with nesting. Groups/subgroups may now
   819  	// only consist of either snaps or subgroups.
   820  	if len(grp.Snaps) != 0 {
   821  		return nil, fmt.Errorf("cannot mix sub groups with snaps in the same group")
   822  	}
   823  
   824  	if err := subGrp.validate(); err != nil {
   825  		return nil, err
   826  	}
   827  
   828  	// save the details of this new sub-group in the parent group
   829  	grp.subGroups = append(grp.subGroups, subGrp)
   830  	grp.SubGroups = append(grp.SubGroups, name)
   831  
   832  	return subGrp, nil
   833  }
   834  
   835  // ResolveCrossReferences takes a set of deserialized groups and sets all
   836  // cross references amongst them using the unexported fields which are not
   837  // serialized.
   838  func ResolveCrossReferences(grps map[string]*Group) error {
   839  	// TODO: consider returning a form of multi-error instead?
   840  
   841  	// iterate over all groups, looking for sub-groups which need to be threaded
   842  	// together with their respective parent groups from the set
   843  
   844  	for name, grp := range grps {
   845  		if name != grp.Name {
   846  			return fmt.Errorf("group has name %q, but is referenced as %q", grp.Name, name)
   847  		}
   848  
   849  		// validate the group, assuming it is unresolved
   850  		if err := grp.validate(); err != nil {
   851  			return fmt.Errorf("group %q is invalid: %v", name, err)
   852  		}
   853  
   854  		// first thread the parent link
   855  		if grp.ParentGroup != "" {
   856  			parent, ok := grps[grp.ParentGroup]
   857  			if !ok {
   858  				return fmt.Errorf("missing group %q referenced as the parent of group %q", grp.ParentGroup, grp.Name)
   859  			}
   860  			grp.parentGroup = parent
   861  
   862  			// make sure that the parent group references this group
   863  			found := false
   864  			for _, parentChildName := range parent.SubGroups {
   865  				if parentChildName == grp.Name {
   866  					found = true
   867  					break
   868  				}
   869  			}
   870  			if !found {
   871  				return fmt.Errorf("group %q does not reference necessary child group %q", parent.Name, grp.Name)
   872  			}
   873  		}
   874  
   875  		// now thread any child links from this group to any children
   876  		if len(grp.SubGroups) != 0 {
   877  			// re-build the internal sub group list
   878  			grp.subGroups = make([]*Group, len(grp.SubGroups))
   879  			for i, subName := range grp.SubGroups {
   880  				sub, ok := grps[subName]
   881  				if !ok {
   882  					return fmt.Errorf("missing group %q referenced as the sub-group of group %q", subName, grp.Name)
   883  				}
   884  
   885  				// check that this sub-group references this group as it's
   886  				// parent
   887  				if sub.ParentGroup != grp.Name {
   888  					return fmt.Errorf("group %q does not reference necessary parent group %q", sub.Name, grp.Name)
   889  				}
   890  
   891  				grp.subGroups[i] = sub
   892  			}
   893  		}
   894  	}
   895  
   896  	return nil
   897  }
   898  
   899  // tree recursively returns all of the sub-groups of the group and the group
   900  // itself.
   901  func (grp *Group) visitTree(visited map[*Group]bool) error {
   902  	// TODO: limit the depth of the tree we traverse
   903  
   904  	// be paranoid about cycles here and check that none of the sub-groups here
   905  	// has already been seen before recursing
   906  	for _, sub := range grp.subGroups {
   907  		// check if this sub-group is actually the same group
   908  		if sub == grp {
   909  			return fmt.Errorf("internal error: circular reference found")
   910  		}
   911  
   912  		// check if we have already seen this sub-group
   913  		if visited[sub] {
   914  			return fmt.Errorf("internal error: circular reference found")
   915  		}
   916  
   917  		// add it to the map
   918  		visited[sub] = true
   919  	}
   920  
   921  	for _, sub := range grp.subGroups {
   922  		if err := sub.visitTree(visited); err != nil {
   923  			return err
   924  		}
   925  	}
   926  
   927  	// add this group too to get the full tree flattened
   928  	visited[grp] = true
   929  
   930  	return nil
   931  }
   932  
   933  // QuotaGroupSet is a set of quota groups, it is used for tracking a set of
   934  // necessary quota groups using AddAllNecessaryGroups to add groups (and their
   935  // implicit dependencies), and AllQuotaGroups to enumerate all the quota groups
   936  // in the set.
   937  type QuotaGroupSet struct {
   938  	grps map[*Group]bool
   939  }
   940  
   941  // AddAllNecessaryGroups adds all groups that are required for the specified
   942  // group to be effective to the set. This means all sub-groups of this group,
   943  // all parent groups of this group, and all sub-trees of any parent groups. This
   944  // set is the set of quota groups that must exist for this quota group to be
   945  // fully realized on a system, since all sub-branches of the full tree must
   946  // exist since this group may share some quota resources with the other
   947  // branches. There is no support for manipulating group trees while
   948  // accumulating to a QuotaGroupSet using this.
   949  func (s *QuotaGroupSet) AddAllNecessaryGroups(grp *Group) error {
   950  	if s.grps == nil {
   951  		s.grps = make(map[*Group]bool)
   952  	}
   953  
   954  	// the easy way to find all the quotas necessary for any arbitrary sub-group
   955  	// is to walk up all the way to the root parent group, then get the full
   956  	// tree beneath that and add all groups
   957  	prevParentGrp := grp
   958  	nextParentGrp := grp.parentGroup
   959  	for nextParentGrp != nil {
   960  		prevParentGrp = nextParentGrp
   961  		nextParentGrp = nextParentGrp.parentGroup
   962  	}
   963  
   964  	if s.grps[prevParentGrp] {
   965  		// nothing to do
   966  		return nil
   967  	}
   968  
   969  	// use a different map to prevent any accumulations to the quota group set
   970  	// that happen before a cycle is detected, we only want to add the groups
   971  	treeGroupMap := make(map[*Group]bool)
   972  	if err := prevParentGrp.visitTree(treeGroupMap); err != nil {
   973  		return err
   974  	}
   975  
   976  	// add all the groups in the tree to the quota group set
   977  	for g := range treeGroupMap {
   978  		s.grps[g] = true
   979  	}
   980  
   981  	return nil
   982  }
   983  
   984  // AllQuotaGroups returns a flattend list of all quota groups and necessary
   985  // quota groups that have been added to the set.
   986  func (s *QuotaGroupSet) AllQuotaGroups() []*Group {
   987  	grps := make([]*Group, 0, len(s.grps))
   988  	for grp := range s.grps {
   989  		grps = append(grps, grp)
   990  	}
   991  
   992  	// sort the groups by their name for easier testing
   993  	sort.SliceStable(grps, func(i, j int) bool {
   994  		return grps[i].Name < grps[j].Name
   995  	})
   996  
   997  	return grps
   998  }