gitee.com/mysnapcore/mysnapd@v0.1.0/snap/quota/quota.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 // Package quota defines state structures for resource quota groups 21 // for snaps. 22 package quota 23 24 import ( 25 "bytes" 26 "fmt" 27 "path/filepath" 28 "runtime" 29 "sort" 30 "time" 31 32 // TODO: move this to snap/quantity? or similar 33 "gitee.com/mysnapcore/mysnapd/dirs" 34 "gitee.com/mysnapcore/mysnapd/gadget/quantity" 35 "gitee.com/mysnapcore/mysnapd/progress" 36 "gitee.com/mysnapcore/mysnapd/snap/naming" 37 "gitee.com/mysnapcore/mysnapd/systemd" 38 ) 39 40 // export it for test 41 var runtimeNumCPU = runtime.NumCPU 42 43 // GroupQuotaCPU contains the different knobs that can be tuned 44 // for cpu quota limits. The allowed CPU percentage to use is split across two limits 45 // to better support a inituitive way of setting the limits. 46 type GroupQuotaCPU struct { 47 // Count is the multiplier that is used in combination with the 48 // percentage parameter to determine the final CPU resource constraint value. 49 // The value is a positive integer or 0. A value of 0 will be treated as 1. 50 Count int `json:"count,omitempty"` 51 52 // Percentage is a positive integer between 0 and 100. It is used to together with 53 // the Count parameter to determine the final CPU resource constraint value. The value 54 // written to the systemd slice will be Count*Percentage. A value of 0 means that the limit 55 // in Percentage and Count is ignored. 56 Percentage int `json:"percentage,omitempty"` 57 58 // CPUSet is a list of CPU core indices that are allowed to be used by the group. Each value 59 // in the list refers to the CPU core number. If the list is empty, all CPU cores are allowed. 60 CPUSet []int `json:"allowed-cpus,omitempty"` 61 } 62 63 // GroupQuotaJournal contains the supported limits for journald. Any limit set here 64 // applies only to the quota group itself. Journal limits will not be inherited by the 65 // sub-groups as this behaviour is not supported by systemd. 66 type GroupQuotaJournal struct { 67 // Size is the maximum allowed size of the journal for the group. 68 // If the size is set below current usage, systemd will automatically treat 69 // the current usage of the journald namespace as the minimum limit and 70 // render whatever set here ineffective. The maximum allowed size for 71 // journald namespaces is 4GB. A value of 0 here means no limit is present. 72 Size quantity.Size `json:"size,omitempty"` 73 74 // RateEnabled tells us whether or not the values provided in RateCount and 75 // RatePeriod should be written. 76 RateEnabled bool `json:"rate-enabled,omitempty"` 77 // RateCount is the number of messages allowed each RatePeriod. A zero value 78 // in this field will disable the rate-limit. 79 RateCount int `json:"rate-count,omitempty"` 80 // RatePeriod is the time-period for when the rate resets. Each RatePeriod, 81 // RateCount number of messages is allowed. A zero value in this field will 82 // disable the rate-limit. 83 RatePeriod time.Duration `json:"rate-period,omitempty"` 84 } 85 86 // Group is a quota group of snaps, services or sub-groups that are all subject 87 // to specific resource quotas. The only quota resource types currently 88 // supported is memory, but this can be expanded in the future. 89 type Group struct { 90 // Name is the name of the quota group. This name is used the 91 // name of the systemd slice underlying the quota group. 92 // Certain names are reserved for future use: system, snapd, root, user. 93 // Otherwise names following the same rules as snap names can be used. 94 Name string `json:"name,omitempty"` 95 96 // SubGroups is the set of sub-groups that are subject to this quota. 97 // Sub-groups have their own limits, subject to the requirement that the 98 // highest quota for a sub-group is that of the parent group. 99 SubGroups []string `json:"sub-groups,omitempty"` 100 101 // subGroups is the set of actual sub-group objects, needed for tracking and 102 // calculations 103 subGroups []*Group 104 105 // MemoryLimit is the limit of memory available to the processes in the 106 // group where if the total used memory of all the processes exceeds the 107 // limit, oom-killer is invoked which will start killing processes. The 108 // specific behavior of which processes are killed is subject to the 109 // ExhaustionBehavior. MemoryLimit is expressed in bytes. 110 MemoryLimit quantity.Size `json:"memory-limit,omitempty"` 111 112 // CPULimit is the quotas for the cpu and consists of a couple of nubs. 113 // It is possible to control the percentage of the cpu available for the group 114 // and which cores (requires cgroupsv2) are allowed to be used. 115 CPULimit *GroupQuotaCPU `json:"cpu-limit,omitempty"` 116 117 // ThreadLimit is the limit of threads/processes that can be active at once in 118 // the group. Once the limit is reached, further forks() or clones() will be blocked 119 // for processes in the group. 120 ThreadLimit int `json:"task-limit,omitempty"` 121 122 // JournalLimit is the limits that apply to the journal for this quota group. When 123 // this limit is present, then the quota group will be assigned a log namespace for 124 // journald. 125 JournalLimit *GroupQuotaJournal `json:"journal-limit,omitempty"` 126 127 // ParentGroup is the the parent group that this group is a child of. If it 128 // is empty, then this is a "root" quota group. 129 ParentGroup string `json:"parent-group,omitempty"` 130 131 // parentGroup is the actual parent group object, needed for tracking and 132 // calculations 133 parentGroup *Group 134 135 // Snaps is the set of snaps that is part of this quota group. If this is 136 // empty then the underlying slice may not exist on the system. 137 Snaps []string `json:"snaps,omitempty"` 138 } 139 140 // NewGroup creates a new top quota group with the given name and memory limit. 141 func NewGroup(name string, resourceLimits Resources) (*Group, error) { 142 grp := &Group{ 143 Name: name, 144 } 145 146 if err := grp.UpdateQuotaLimits(resourceLimits); err != nil { 147 return nil, err 148 } 149 150 if err := grp.validate(); err != nil { 151 return nil, err 152 } 153 154 return grp, nil 155 } 156 157 func (grp *Group) GetQuotaResources() Resources { 158 resourcesBuilder := NewResourcesBuilder() 159 if grp.MemoryLimit != 0 { 160 resourcesBuilder.WithMemoryLimit(grp.MemoryLimit) 161 } 162 if grp.CPULimit != nil { 163 if grp.CPULimit.Count != 0 { 164 resourcesBuilder.WithCPUCount(grp.CPULimit.Count) 165 } 166 if grp.CPULimit.Percentage != 0 { 167 resourcesBuilder.WithCPUPercentage(grp.CPULimit.Percentage) 168 } 169 if len(grp.CPULimit.CPUSet) != 0 { 170 resourcesBuilder.WithCPUSet(grp.CPULimit.CPUSet) 171 } 172 } 173 if grp.ThreadLimit != 0 { 174 resourcesBuilder.WithThreadLimit(grp.ThreadLimit) 175 } 176 if grp.JournalLimit != nil { 177 resourcesBuilder.WithJournalNamespace() 178 if grp.JournalLimit.Size != 0 { 179 resourcesBuilder.WithJournalSize(grp.JournalLimit.Size) 180 } 181 // We cannot just check for RateCount and RatePeriod and call WithJournalRate() 182 // only if both are non-zero, because not calling WithJournalRate() causes the 183 // system's default rate count and rate period to be used; what we really want 184 // here is to be able to completely disable the rate-limit for a journal quota. 185 if grp.JournalLimit.RateEnabled { 186 resourcesBuilder.WithJournalRate(grp.JournalLimit.RateCount, grp.JournalLimit.RatePeriod) 187 } 188 } 189 return resourcesBuilder.Build() 190 } 191 192 // CurrentMemoryUsage returns the current memory usage of the quota group. For 193 // quota groups which do not yet have a backing systemd slice on the system ( 194 // i.e. quota groups without any snaps in them), the memory usage is reported as 195 // 0. 196 func (grp *Group) CurrentMemoryUsage() (quantity.Size, error) { 197 sysd := systemd.New(systemd.SystemMode, progress.Null) 198 199 // check if this group is actually active, it could not physically exist yet 200 // since it has no snaps in it 201 isActive, err := sysd.IsActive(grp.SliceFileName()) 202 if err != nil { 203 return 0, err 204 } 205 if !isActive { 206 return 0, nil 207 } 208 209 mem, err := sysd.CurrentMemoryUsage(grp.SliceFileName()) 210 if err != nil { 211 return 0, err 212 } 213 214 return mem, nil 215 } 216 217 // CurrentTaskUsage returns the current task (processes, threads) usage of the quota group. 218 // For quota groups which do not yet have a backing systemd slice on the system ( 219 // i.e. quota groups without any snaps in them), the task usage is reported 220 // as 0 221 func (grp *Group) CurrentTaskUsage() (int, error) { 222 sysd := systemd.New(systemd.SystemMode, progress.Null) 223 224 // check if this group is actually active, it could not physically exist yet 225 // since it has no snaps in it 226 isActive, err := sysd.IsActive(grp.SliceFileName()) 227 if err != nil { 228 return 0, err 229 } 230 if !isActive { 231 return 0, nil 232 } 233 234 count, err := sysd.CurrentTasksCount(grp.SliceFileName()) 235 if err != nil { 236 return 0, err 237 } 238 return int(count), nil 239 } 240 241 // SliceFileName returns the name of the slice file that should be used for this 242 // quota group. This name will include all of the group's parents in the name. 243 // For example, a group named "bar" that is a child of the "foo" group will have 244 // a systemd slice name as "snap.foo-bar.slice". Note that the slice name may 245 // differ from the snapd friendly group name, mainly in the case that the group 246 // is a sub group. 247 func (grp *Group) SliceFileName() string { 248 escapedGrpName := systemd.EscapeUnitNamePath(grp.Name) 249 if grp.ParentGroup == "" { 250 // root group name, then the slice unit is just "<name>.slice" 251 return fmt.Sprintf("snap.%s.slice", escapedGrpName) 252 } 253 254 // otherwise we need to track back to get all of the parent elements 255 grpNames := []string{} 256 parentGrp := grp.parentGroup 257 for parentGrp != nil { 258 grpNames = append([]string{parentGrp.Name}, grpNames...) 259 parentGrp = parentGrp.parentGroup 260 } 261 262 buf := &bytes.Buffer{} 263 fmt.Fprintf(buf, "snap.") 264 for _, parentGrpName := range grpNames { 265 fmt.Fprintf(buf, "%s-", systemd.EscapeUnitNamePath(parentGrpName)) 266 } 267 fmt.Fprintf(buf, "%s.slice", escapedGrpName) 268 return buf.String() 269 } 270 271 // JournalNamespaceName returns the snap formatted name of the log namespace 272 func (grp *Group) JournalNamespaceName() string { 273 return fmt.Sprintf("snap-%s", grp.Name) 274 } 275 276 // JournalConfFileName returns the name of the journal configuration file that should 277 // be used for this quota group. As an example, a group named "foo" will return a name 278 // of journald@snap-foo.conf 279 func (grp *Group) JournalConfFileName() string { 280 return fmt.Sprintf("journald@%s.conf", grp.JournalNamespaceName()) 281 } 282 283 // JournalServiceName returns the systemd service name for the quota group. 284 func (grp *Group) JournalServiceName() string { 285 return fmt.Sprintf("systemd-journald@%s.service", grp.JournalNamespaceName()) 286 } 287 288 // JournalServiceFile returns the directory specific to this quota group for 289 // its journal service unit drop-in. 290 func (grp *Group) JournalServiceDropInDir() string { 291 return filepath.Join(dirs.SnapServicesDir, grp.JournalServiceName()+".d") 292 } 293 294 // JournalServiceDropInFile returns the full path to the journal service unit drop-in 295 // file for the quota group. 296 func (grp *Group) JournalServiceDropInFile() string { 297 return filepath.Join(grp.JournalServiceDropInDir(), "00-snap.conf") 298 } 299 300 // groupQuotaAllocations contains information about current quotas of a group 301 // and is used by getQuotaAllocations to contain this information. This only accounts 302 // for quotas that support inheritance, which currently does not include journal quotas. 303 // There are two types of values for each quota - the quota limit set by this group, 304 // and the quota reserved by children of this group. Examples: 305 // Group that has a non-memory quota, but has a child group that has a memory quota of 512mb: 306 // memoryLimit = 0 307 // memoryReserved = 512 mb 308 // Group that has a memory quota of 512mb, but has only children groups with non-memory quota: 309 // memoryLimit = 512 mb 310 // memoryReserved = 0 311 // Group that has a memory quota of 512mb, and has a child group that has a memory quota of 256mb: 312 // memoryLimit = 512 mb 313 // memoryReserved = 256 mb 314 // If the limit value is non-zero, then the reserved value can never be greater than the limit, however 315 // if the limit is zero, then the reserved value must be below the nearest non-zero limit as you traverse 316 // up the tree. 317 type groupQuotaAllocations struct { 318 MemoryLimit quantity.Size 319 MemoryReservedByChildren quantity.Size 320 321 CPULimit int 322 CPUReservedByChildren int 323 324 ThreadsLimit int 325 ThreadsReservedByChildren int 326 327 CPUSetLimit []int 328 CPUSetReservedByChildren []int 329 } 330 331 func max(a, b int) int { 332 if a > b { 333 return a 334 } 335 return b 336 } 337 338 func maxq(a, b quantity.Size) quantity.Size { 339 if a > b { 340 return a 341 } 342 return b 343 } 344 345 // GetLocalCPUSetQuota returns the current CPU set quota for the group. This 346 // does not return any inheritted CPU set quota. 347 func (grp *Group) GetLocalCPUSetQuota() []int { 348 if grp.CPULimit == nil || len(grp.CPULimit.CPUSet) == 0 { 349 return []int{} 350 } 351 return grp.CPULimit.CPUSet 352 } 353 354 // GetCPUSetQuota returns the currently active CPU set quota for this group, which 355 // includes the case where the CPU set is inherited from a parent group. 356 func (grp *Group) GetCPUSetQuota() []int { 357 localCPUSet := grp.GetLocalCPUSetQuota() 358 if len(localCPUSet) != 0 { 359 return localCPUSet 360 } 361 362 parent := grp.parentGroup 363 for parent != nil { 364 if parent.CPULimit != nil && len(parent.CPULimit.CPUSet) != 0 { 365 return parent.CPULimit.CPUSet 366 } 367 parent = parent.parentGroup 368 } 369 return nil 370 } 371 372 // GetLocalCPUQuota returns the final calculated count and percentage of the 373 // current CPU quota for the group. This does not return any inherited CPU quota, but 374 // it does take any inherited CPU set into account to adjust in the case of a relative 375 // usage percentage. If the CPU count is set to 0, then it is expected that it returns 376 // CPULimit.Percentage times the number of all allowed cores. This is either 377 // the full amount of cores present on the system, or it is the number of cores allowed 378 // for this group. Otherwise this command should return the actual count and percentage 379 // set by the group. 380 func (grp *Group) GetLocalCPUQuota() (int, int) { 381 if grp.CPULimit == nil || grp.CPULimit.Percentage == 0 { 382 return 0, 0 383 } 384 385 // always use the count if set 386 if grp.CPULimit.Count != 0 { 387 return grp.CPULimit.Count, grp.CPULimit.Percentage 388 } else { 389 cpuCount := runtimeNumCPU() 390 cpuSetCount := len(grp.GetCPUSetQuota()) 391 if cpuSetCount != 0 && cpuSetCount < cpuCount { 392 cpuCount = cpuSetCount 393 } 394 return cpuCount, grp.CPULimit.Percentage 395 } 396 } 397 398 func (grp *Group) getCurrentCPUAllocation() int { 399 count, percentage := grp.GetLocalCPUQuota() 400 return count * percentage 401 } 402 403 // getQuotaAllocations Recursively retrieve current group quotas statistics, this should just 404 // be invoked on the upper parent of a group tree, and then it will gather active quotas for the 405 // tree and store them in the allQuotas paramater 406 func (grp *Group) getQuotaAllocations(allQuotas map[string]*groupQuotaAllocations) *groupQuotaAllocations { 407 limits := &groupQuotaAllocations{ 408 MemoryLimit: grp.MemoryLimit, 409 CPULimit: grp.getCurrentCPUAllocation(), 410 ThreadsLimit: grp.ThreadLimit, 411 CPUSetLimit: grp.GetLocalCPUSetQuota(), 412 } 413 414 // sliceUniqueAndSort sorts an array of ints in ascending order and removes duplicates 415 sliceUniqueAndSort := func(input []int) []int { 416 m := map[int]bool{} 417 for _, v := range input { 418 m[v] = true 419 } 420 result := []int{} 421 for k := range m { 422 result = append(result, k) 423 } 424 sort.Ints(result) 425 return result 426 } 427 428 for _, subGroup := range grp.subGroups { 429 // cyclic checks are made by visitTree so we make the assumption here 430 // that no cyclic dependencies exists. 431 subGroupLimits := subGroup.getQuotaAllocations(allQuotas) 432 433 // As we count up the usage of quotas across our sub-groups we must either use the actual 434 // limits of the below sub-group, or the actual usage of the sub-group. The reason we must do this 435 // is because if the sub-group doesn't have any limit set for a quota, but the sub-group has sub-groups 436 // itself that do have limits, then we must use that value instead. Hence the max* functions. 437 limits.MemoryReservedByChildren += maxq(subGroupLimits.MemoryLimit, subGroupLimits.MemoryReservedByChildren) 438 limits.CPUReservedByChildren += max(subGroupLimits.CPULimit, subGroupLimits.CPUReservedByChildren) 439 limits.ThreadsReservedByChildren += max(subGroupLimits.ThreadsLimit, subGroupLimits.ThreadsReservedByChildren) 440 441 // We need to merge the allowed CPUs lists, but we need to make sure that the list is unique, since cpu cores 442 // can be reused between sub-groups. 443 if len(subGroupLimits.CPUSetLimit) > 0 { 444 limits.CPUSetReservedByChildren = append(limits.CPUSetReservedByChildren, subGroupLimits.CPUSetLimit...) 445 } else if len(subGroupLimits.CPUSetReservedByChildren) > 0 { 446 limits.CPUSetReservedByChildren = append(limits.CPUSetReservedByChildren, subGroupLimits.CPUSetReservedByChildren...) 447 } 448 } 449 450 // Sort the allowed CPUs list, and remove duplicates. 451 if len(limits.CPUSetReservedByChildren) > 0 { 452 limits.CPUSetReservedByChildren = sliceUniqueAndSort(limits.CPUSetReservedByChildren) 453 } 454 455 // Store the retrieved limits for the group 456 allQuotas[grp.Name] = limits 457 return limits 458 } 459 460 // validateMemoryResourceFit verifies that the new memory limit doesn't conflict with the current reserved memory 461 // limit of the group, and if not locates the nearest parent group that has a memory quota, and then verifies 462 // if that group has any space available by checking its 'memoryReserved'. The 'memoryReserved' tells us how much 463 // of the group quotas limit has been used already by its subgroups (excluding the one querying). 464 func (grp *Group) validateMemoryResourceFit(allQuotas map[string]*groupQuotaAllocations, memoryLimit quantity.Size) error { 465 466 // make sure current usage does not exceed the new limit, we can avoid any 467 // recursive descent as we already have counted up the usage of our children. 468 currentLimits := allQuotas[grp.Name] 469 memoryReserved := grp.MemoryLimit 470 if currentLimits != nil { 471 if currentLimits.MemoryReservedByChildren > memoryLimit { 472 return fmt.Errorf("group memory limit of %s is too small to fit current subgroup usage of %s", 473 memoryLimit.IECString(), currentLimits.MemoryReservedByChildren.IECString()) 474 } 475 476 // if we are reducing the limit, then we don't need to check upper parents, 477 // as we can assume it will fit by this point 478 if memoryLimit < grp.MemoryLimit { 479 return nil 480 } 481 482 memoryReserved = maxq(memoryReserved, currentLimits.MemoryReservedByChildren) 483 } 484 485 // now we check parents up the tree to make sure we also fit with any 486 // previous usage limits of our parents. 487 parent := grp.parentGroup 488 for parent != nil { 489 limits := allQuotas[parent.Name] 490 if limits != nil && limits.MemoryLimit != 0 { 491 // We need to take into account that we might have a matching limit in this group, and thus we account 492 // for some of the reserved memory. So subtract that. 493 memoryAvailable := limits.MemoryLimit - (limits.MemoryReservedByChildren - memoryReserved) 494 if memoryLimit > memoryAvailable { 495 return fmt.Errorf("sub-group memory limit of %s is too large to fit inside group %q remaining quota space %s", 496 memoryLimit.IECString(), parent.Name, memoryAvailable.IECString()) 497 } 498 break 499 } 500 parent = parent.parentGroup 501 } 502 return nil 503 } 504 505 // validateCPUResourceFit verifies that the new cpu limit doesn't conflict with the current reserved cpu 506 // limit of the group, and if not locates the nearest parent group that has a cpu quota, and then verifies 507 // if that group has any space available by checking its 'cpuReserved'. The 'cpuReserved' tells us how much 508 // of the group quotas limit has been used already by its subgroups (excluding the one querying). 509 func (grp *Group) validateCPUResourceFit(allQuotas map[string]*groupQuotaAllocations, resourceLimits Resources) error { 510 511 // handle the zero-count case where we instead need to use the number 512 // of cpu cores available to use, which is either the number of cores 513 // on the system, or in the provided CPU set, or in a CPU set inheritted. 514 cpuRequested := resourceLimits.CPU.Count * resourceLimits.CPU.Percentage 515 if resourceLimits.CPU.Count == 0 { 516 cpuSetCount := len(grp.GetCPUSetQuota()) 517 if cpuSetCount == 0 { 518 cpuSetCount = runtimeNumCPU() 519 } 520 cpuRequested = cpuSetCount * resourceLimits.CPU.Percentage 521 } 522 523 // make sure current usage does not exceed the new limit, we can avoid any 524 // recursive descent as we already have counted up the usage of our children. 525 currentLimits := allQuotas[grp.Name] 526 527 // currentLimits will be null during creation, so this statement is triggered when 528 // we modify limits on an existing group 529 var existingCPUAllocation int 530 if currentLimits != nil { 531 existingCPUAllocation = currentLimits.CPULimit 532 if currentLimits.CPUReservedByChildren > cpuRequested { 533 return fmt.Errorf("group cpu limit of %d%% is less than current subgroup usage of %d%%", 534 cpuRequested, currentLimits.CPUReservedByChildren) 535 } 536 537 // if we are reducing the limit, then we don't need to check upper parents, 538 // as we can assume it will fit by this point 539 if cpuRequested < existingCPUAllocation { 540 return nil 541 } 542 543 existingCPUAllocation = max(existingCPUAllocation, currentLimits.CPUReservedByChildren) 544 } 545 546 // now we check parents up the tree to make sure we also fit with any 547 // previous usage limits of our parents. 548 parent := grp.parentGroup 549 for parent != nil { 550 limits := allQuotas[parent.Name] 551 if limits != nil { 552 if limits.CPULimit != 0 { 553 // We need to take into account that we might have a matching limit in this group, and thus we account 554 // for some of the reserved amount of cpu time. So subtract that. 555 cpuAvailable := limits.CPULimit - (limits.CPUReservedByChildren - existingCPUAllocation) 556 if cpuRequested > cpuAvailable { 557 return fmt.Errorf("sub-group cpu limit of %d%% is too large to fit inside group %q remaining quota space %d%%", 558 cpuRequested, parent.Name, cpuAvailable) 559 } 560 break 561 } else if len(limits.CPUSetLimit) > 0 { 562 maxCPUAvailableInSet := len(limits.CPUSetLimit) * 100 563 if cpuRequested > maxCPUAvailableInSet { 564 return fmt.Errorf("sub-group cpu limit of %d%% is too large to fit inside group %q with allowed CPU set %v", 565 cpuRequested, parent.Name, limits.CPUSetLimit) 566 } 567 break 568 } 569 } 570 parent = parent.parentGroup 571 } 572 return nil 573 } 574 575 func contains(s []int, e int) bool { 576 for _, a := range s { 577 if a == e { 578 return true 579 } 580 } 581 return false 582 } 583 584 // validateCPUsAllowedResourceFit verifies that the new cpu-set doesn't conflict with the current reserved cpu-set 585 // of the group, and if not locates the nearest parent group that has a cpu-set quota, and then verifies 586 // that the requested cpu cores match a subset of the previously set allowance. 587 func (grp *Group) validateCPUsAllowedResourceFit(allQuotas map[string]*groupQuotaAllocations, cpusAllowed []int) error { 588 589 // isSuperset returns true if a is a superset of b. 590 isSuperset := func(a, b []int) bool { 591 for _, b1 := range b { 592 if !contains(a, b1) { 593 return false 594 } 595 } 596 return true 597 } 598 599 // make sure current cpu sets don't conflict, we can avoid any 600 // recursive descent as we already have counted up the usage of our children. 601 currentLimits := allQuotas[grp.Name] 602 if currentLimits != nil { 603 if !isSuperset(cpusAllowed, currentLimits.CPUSetReservedByChildren) { 604 return fmt.Errorf("group cpu-set %v is not a superset of current subgroup usage of %v", 605 cpusAllowed, currentLimits.CPUSetReservedByChildren) 606 } 607 608 // If we are doing further restrictions (i.e the new cpu set is a subset of the current) 609 // and we got past the previous check then we don't need to check upper parents, 610 // we can assume by this point it will be ok 611 if isSuperset(grp.GetLocalCPUSetQuota(), cpusAllowed) { 612 return nil 613 } 614 } 615 616 // now we check parents up the tree to make sure we also fit with any 617 // previous usage limits of our parents. 618 parent := grp.parentGroup 619 for parent != nil { 620 limits := allQuotas[parent.Name] 621 if limits != nil && len(limits.CPUSetLimit) != 0 { 622 if !isSuperset(limits.CPUSetLimit, cpusAllowed) { 623 return fmt.Errorf("sub-group cpu-set %v is not a subset of group %q cpu-set %v", 624 cpusAllowed, parent.Name, limits.CPUSetLimit) 625 } 626 break 627 } 628 parent = parent.parentGroup 629 } 630 return nil 631 } 632 633 // validateThreadResourceFit verifies that the new thread limit doesn't conflict with the current reserved thread 634 // limit of the group, and if not locates the nearest parent group that has a thread quota, and then verifies 635 // if that group has any space available by checking its 'threadsReserved'. The 'threadsReserved' tells us how much 636 // of the group quotas limit has been used already by its subgroups (excluding the one querying). 637 func (grp *Group) validateThreadResourceFit(allQuotas map[string]*groupQuotaAllocations, threadLimit int) error { 638 639 // make sure current usage does not exceed the new limit, we can avoid any 640 // recursive descent as we already have counted up the usage of our children. 641 currentLimits := allQuotas[grp.Name] 642 threadsReserved := grp.ThreadLimit 643 if currentLimits != nil { 644 if currentLimits.ThreadsReservedByChildren > threadLimit { 645 return fmt.Errorf("group thread limit of %d is too small to fit current subgroup usage of %d", 646 threadLimit, currentLimits.ThreadsReservedByChildren) 647 } 648 649 // if we are reducing the limit, then we don't need to check upper parents, 650 // as we can assume it will fit by this point 651 if threadLimit < grp.ThreadLimit { 652 return nil 653 } 654 655 threadsReserved = max(threadsReserved, currentLimits.ThreadsReservedByChildren) 656 } 657 658 // now we check parents up the tree to make sure we also fit with any 659 // previous usage limits of our parents. 660 parent := grp.parentGroup 661 for parent != nil { 662 limits := allQuotas[parent.Name] 663 if limits != nil && limits.ThreadsLimit != 0 { 664 // We need to take into account that we might have a matching limit in this group, and thus we account 665 // for some of the reserved threads. So subtract that. 666 threadsAvailable := limits.ThreadsLimit - (limits.ThreadsReservedByChildren - threadsReserved) 667 if threadLimit > threadsAvailable { 668 return fmt.Errorf("sub-group thread limit of %d is too large to fit inside group %q remaining quota space %d", 669 threadLimit, parent.Name, threadsAvailable) 670 } 671 break 672 } 673 parent = parent.parentGroup 674 } 675 return nil 676 } 677 678 // validateQuotasFit verifies that the given group's current limits fits correctly 679 // into the group's parent group's limits. This is done in multiple steps, where the first 680 // one is to get a statistics for the upper-most parent group, to get a combined overview 681 // of all quotas currently set and their usage. The next step is, for each quota we want to 682 // set/change, verify that it does not exceed any previously set quota of matching type. 683 func (grp *Group) validateQuotasFit(resourceLimits Resources) error { 684 upperParent := grp 685 for upperParent.parentGroup != nil { 686 upperParent = upperParent.parentGroup 687 } 688 689 allQuotas := make(map[string]*groupQuotaAllocations) 690 upperParent.getQuotaAllocations(allQuotas) 691 692 // for each limit we want to set, we need to find the closes parent 693 // limit that matches it, and then verify against it's usage if we have room 694 if resourceLimits.Memory != nil { 695 if err := grp.validateMemoryResourceFit(allQuotas, resourceLimits.Memory.Limit); err != nil { 696 return err 697 } 698 } 699 if resourceLimits.CPU != nil && resourceLimits.CPU.Percentage != 0 { 700 if err := grp.validateCPUResourceFit(allQuotas, resourceLimits); err != nil { 701 return err 702 } 703 } 704 if resourceLimits.CPUSet != nil && len(resourceLimits.CPUSet.CPUs) != 0 { 705 if err := grp.validateCPUsAllowedResourceFit(allQuotas, resourceLimits.CPUSet.CPUs); err != nil { 706 return err 707 } 708 } 709 if resourceLimits.Threads != nil { 710 if err := grp.validateThreadResourceFit(allQuotas, resourceLimits.Threads.Limit); err != nil { 711 return err 712 } 713 } 714 return nil 715 } 716 717 // UpdateQuotaLimits updates all the quota limits set for the group to the new limits 718 // given. The limits will be validated against the group's parent group's limits, to verify 719 // that they fit. For instance, if the parent group has a memory limit of 1GB, and the new limit 720 // given here is 2GB, then the new limit will be rejected. 721 func (grp *Group) UpdateQuotaLimits(resourceLimits Resources) error { 722 currentLimits := grp.GetQuotaResources() 723 if err := currentLimits.ValidateChange(resourceLimits); err != nil { 724 return err 725 } 726 727 if err := grp.validateQuotasFit(resourceLimits); err != nil { 728 return err 729 } 730 731 if resourceLimits.Memory != nil { 732 grp.MemoryLimit = resourceLimits.Memory.Limit 733 } 734 if resourceLimits.CPU != nil { 735 grp.CPULimit = &GroupQuotaCPU{ 736 Count: resourceLimits.CPU.Count, 737 Percentage: resourceLimits.CPU.Percentage, 738 } 739 } 740 if resourceLimits.CPUSet != nil { 741 if grp.CPULimit == nil { 742 grp.CPULimit = &GroupQuotaCPU{} 743 } 744 grp.CPULimit.CPUSet = resourceLimits.CPUSet.CPUs 745 } 746 if resourceLimits.Threads != nil { 747 grp.ThreadLimit = resourceLimits.Threads.Limit 748 } 749 if resourceLimits.Journal != nil { 750 if grp.JournalLimit == nil { 751 grp.JournalLimit = &GroupQuotaJournal{} 752 } 753 if resourceLimits.Journal.Size != nil { 754 grp.JournalLimit.Size = resourceLimits.Journal.Size.Limit 755 } 756 if resourceLimits.Journal.Rate != nil { 757 grp.JournalLimit.RateEnabled = true 758 grp.JournalLimit.RateCount = resourceLimits.Journal.Rate.Count 759 grp.JournalLimit.RatePeriod = resourceLimits.Journal.Rate.Period 760 } 761 } 762 return nil 763 } 764 765 func (grp *Group) validate() error { 766 if err := naming.ValidateQuotaGroup(grp.Name); err != nil { 767 return err 768 } 769 770 // check if the name is reserved for future usage 771 switch grp.Name { 772 case "root", "system", "snapd", "user": 773 return fmt.Errorf("group name %q reserved", grp.Name) 774 } 775 776 // validate the resource limits for the group 777 limits := grp.GetQuotaResources() 778 if err := limits.Validate(); err != nil { 779 return err 780 } 781 782 if grp.ParentGroup != "" && grp.Name == grp.ParentGroup { 783 return fmt.Errorf("group has circular parent reference to itself") 784 } 785 786 if len(grp.SubGroups) != 0 { 787 for _, subGrp := range grp.SubGroups { 788 if subGrp == grp.Name { 789 return fmt.Errorf("group has circular sub-group reference to itself") 790 } 791 } 792 } 793 return nil 794 } 795 796 // NewSubGroup creates a new sub group under the current group. 797 func (grp *Group) NewSubGroup(name string, resourceLimits Resources) (*Group, error) { 798 // TODO: implement a maximum sub-group depth 799 800 subGrp := &Group{ 801 Name: name, 802 ParentGroup: grp.Name, 803 parentGroup: grp, 804 } 805 806 if err := subGrp.UpdateQuotaLimits(resourceLimits); err != nil { 807 return nil, err 808 } 809 810 // check early that the sub group name is not the same as that of the 811 // parent, this is fine in systemd world, but in snapd we want unique quota 812 // groups 813 if name == grp.Name { 814 return nil, fmt.Errorf("cannot use same name %q for sub group as parent group", name) 815 } 816 817 // With the new quotas we don't support groups that have a mixture of snaps and 818 // subgroups, as this will cause issues with nesting. Groups/subgroups may now 819 // only consist of either snaps or subgroups. 820 if len(grp.Snaps) != 0 { 821 return nil, fmt.Errorf("cannot mix sub groups with snaps in the same group") 822 } 823 824 if err := subGrp.validate(); err != nil { 825 return nil, err 826 } 827 828 // save the details of this new sub-group in the parent group 829 grp.subGroups = append(grp.subGroups, subGrp) 830 grp.SubGroups = append(grp.SubGroups, name) 831 832 return subGrp, nil 833 } 834 835 // ResolveCrossReferences takes a set of deserialized groups and sets all 836 // cross references amongst them using the unexported fields which are not 837 // serialized. 838 func ResolveCrossReferences(grps map[string]*Group) error { 839 // TODO: consider returning a form of multi-error instead? 840 841 // iterate over all groups, looking for sub-groups which need to be threaded 842 // together with their respective parent groups from the set 843 844 for name, grp := range grps { 845 if name != grp.Name { 846 return fmt.Errorf("group has name %q, but is referenced as %q", grp.Name, name) 847 } 848 849 // validate the group, assuming it is unresolved 850 if err := grp.validate(); err != nil { 851 return fmt.Errorf("group %q is invalid: %v", name, err) 852 } 853 854 // first thread the parent link 855 if grp.ParentGroup != "" { 856 parent, ok := grps[grp.ParentGroup] 857 if !ok { 858 return fmt.Errorf("missing group %q referenced as the parent of group %q", grp.ParentGroup, grp.Name) 859 } 860 grp.parentGroup = parent 861 862 // make sure that the parent group references this group 863 found := false 864 for _, parentChildName := range parent.SubGroups { 865 if parentChildName == grp.Name { 866 found = true 867 break 868 } 869 } 870 if !found { 871 return fmt.Errorf("group %q does not reference necessary child group %q", parent.Name, grp.Name) 872 } 873 } 874 875 // now thread any child links from this group to any children 876 if len(grp.SubGroups) != 0 { 877 // re-build the internal sub group list 878 grp.subGroups = make([]*Group, len(grp.SubGroups)) 879 for i, subName := range grp.SubGroups { 880 sub, ok := grps[subName] 881 if !ok { 882 return fmt.Errorf("missing group %q referenced as the sub-group of group %q", subName, grp.Name) 883 } 884 885 // check that this sub-group references this group as it's 886 // parent 887 if sub.ParentGroup != grp.Name { 888 return fmt.Errorf("group %q does not reference necessary parent group %q", sub.Name, grp.Name) 889 } 890 891 grp.subGroups[i] = sub 892 } 893 } 894 } 895 896 return nil 897 } 898 899 // tree recursively returns all of the sub-groups of the group and the group 900 // itself. 901 func (grp *Group) visitTree(visited map[*Group]bool) error { 902 // TODO: limit the depth of the tree we traverse 903 904 // be paranoid about cycles here and check that none of the sub-groups here 905 // has already been seen before recursing 906 for _, sub := range grp.subGroups { 907 // check if this sub-group is actually the same group 908 if sub == grp { 909 return fmt.Errorf("internal error: circular reference found") 910 } 911 912 // check if we have already seen this sub-group 913 if visited[sub] { 914 return fmt.Errorf("internal error: circular reference found") 915 } 916 917 // add it to the map 918 visited[sub] = true 919 } 920 921 for _, sub := range grp.subGroups { 922 if err := sub.visitTree(visited); err != nil { 923 return err 924 } 925 } 926 927 // add this group too to get the full tree flattened 928 visited[grp] = true 929 930 return nil 931 } 932 933 // QuotaGroupSet is a set of quota groups, it is used for tracking a set of 934 // necessary quota groups using AddAllNecessaryGroups to add groups (and their 935 // implicit dependencies), and AllQuotaGroups to enumerate all the quota groups 936 // in the set. 937 type QuotaGroupSet struct { 938 grps map[*Group]bool 939 } 940 941 // AddAllNecessaryGroups adds all groups that are required for the specified 942 // group to be effective to the set. This means all sub-groups of this group, 943 // all parent groups of this group, and all sub-trees of any parent groups. This 944 // set is the set of quota groups that must exist for this quota group to be 945 // fully realized on a system, since all sub-branches of the full tree must 946 // exist since this group may share some quota resources with the other 947 // branches. There is no support for manipulating group trees while 948 // accumulating to a QuotaGroupSet using this. 949 func (s *QuotaGroupSet) AddAllNecessaryGroups(grp *Group) error { 950 if s.grps == nil { 951 s.grps = make(map[*Group]bool) 952 } 953 954 // the easy way to find all the quotas necessary for any arbitrary sub-group 955 // is to walk up all the way to the root parent group, then get the full 956 // tree beneath that and add all groups 957 prevParentGrp := grp 958 nextParentGrp := grp.parentGroup 959 for nextParentGrp != nil { 960 prevParentGrp = nextParentGrp 961 nextParentGrp = nextParentGrp.parentGroup 962 } 963 964 if s.grps[prevParentGrp] { 965 // nothing to do 966 return nil 967 } 968 969 // use a different map to prevent any accumulations to the quota group set 970 // that happen before a cycle is detected, we only want to add the groups 971 treeGroupMap := make(map[*Group]bool) 972 if err := prevParentGrp.visitTree(treeGroupMap); err != nil { 973 return err 974 } 975 976 // add all the groups in the tree to the quota group set 977 for g := range treeGroupMap { 978 s.grps[g] = true 979 } 980 981 return nil 982 } 983 984 // AllQuotaGroups returns a flattend list of all quota groups and necessary 985 // quota groups that have been added to the set. 986 func (s *QuotaGroupSet) AllQuotaGroups() []*Group { 987 grps := make([]*Group, 0, len(s.grps)) 988 for grp := range s.grps { 989 grps = append(grps, grp) 990 } 991 992 // sort the groups by their name for easier testing 993 sort.SliceStable(grps, func(i, j int) bool { 994 return grps[i].Name < grps[j].Name 995 }) 996 997 return grps 998 }