github.com/freetocompute/snapd@v0.0.0-20210618182524-2fb355d72fd9/overlord/servicestate/quota_handlers.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package servicestate 21 22 import ( 23 "fmt" 24 "sort" 25 "time" 26 27 tomb "gopkg.in/tomb.v2" 28 29 "github.com/snapcore/snapd/gadget/quantity" 30 "github.com/snapcore/snapd/logger" 31 "github.com/snapcore/snapd/overlord/snapstate" 32 "github.com/snapcore/snapd/overlord/state" 33 "github.com/snapcore/snapd/progress" 34 "github.com/snapcore/snapd/snap" 35 "github.com/snapcore/snapd/snap/quota" 36 "github.com/snapcore/snapd/snapdenv" 37 "github.com/snapcore/snapd/strutil" 38 "github.com/snapcore/snapd/systemd" 39 "github.com/snapcore/snapd/timings" 40 "github.com/snapcore/snapd/wrappers" 41 ) 42 43 // QuotaControlAction is the serialized representation of a quota group 44 // modification that lives in a task. 45 type QuotaControlAction struct { 46 // QuotaName is the name of the quota group being controlled. 47 QuotaName string `json:"quota-name"` 48 49 // Action is the action being taken on the quota group. It can be either 50 // "create", "update", or "remove". 51 Action string `json:"action"` 52 53 // AddSnaps is the set of snaps to add to the quota group, valid for either 54 // the "update" or the "create" actions. 55 AddSnaps []string `json:"snaps"` 56 57 // MemoryLimit is the memory limit for the quota group being controlled, 58 // either the initial limit the group is created with for the "create" 59 // action, or if non-zero for the "update" the memory limit, then the new 60 // value to be set. 61 MemoryLimit quantity.Size 62 63 // ParentName is the name of the parent for the quota group if it is being 64 // created. Eventually this could be used with the "update" action to 65 // support moving quota groups from one parent to another, but that is 66 // currently not supported. 67 ParentName string 68 } 69 70 func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error { 71 st := t.State() 72 st.Lock() 73 defer st.Unlock() 74 75 perfTimings := state.TimingsForTask(t) 76 defer perfTimings.Save(st) 77 78 meter := snapstate.NewTaskProgressAdapterUnlocked(t) 79 80 qcs := []QuotaControlAction{} 81 err := t.Get("quota-control-actions", &qcs) 82 if err != nil { 83 return fmt.Errorf("internal error: cannot get quota-control-action: %v", err) 84 } 85 86 // TODO: support more than one action 87 switch { 88 case len(qcs) > 1: 89 return fmt.Errorf("multiple quota group actions not supported yet") 90 case len(qcs) == 0: 91 return fmt.Errorf("internal error: no quota group actions for quota-control task") 92 } 93 94 qc := qcs[0] 95 96 allGrps, err := AllQuotas(st) 97 if err != nil { 98 return err 99 } 100 101 switch qc.Action { 102 case "create": 103 err = quotaCreate(st, t, qc, allGrps, meter, perfTimings) 104 case "remove": 105 err = quotaRemove(st, t, qc, allGrps, meter, perfTimings) 106 case "update": 107 err = quotaUpdate(st, t, qc, allGrps, meter, perfTimings) 108 default: 109 err = fmt.Errorf("unknown action %q requested", qc.Action) 110 } 111 112 return err 113 } 114 115 func quotaCreate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 116 // make sure the group does not exist yet 117 if _, ok := allGrps[action.QuotaName]; ok { 118 return fmt.Errorf("group %q already exists", action.QuotaName) 119 } 120 121 // make sure the memory limit is not zero 122 // TODO: this needs to be updated to 4K when PR snapcore/snapd#10346 lands 123 // and an equivalent check needs to be put back into CreateQuota() before 124 // the tasks are created 125 if action.MemoryLimit == 0 { 126 return fmt.Errorf("internal error, MemoryLimit option is mandatory for create action") 127 } 128 129 // make sure the memory limit is at least 4K, that is the minimum size 130 // to allow nesting, otherwise groups with less than 4K will trigger the 131 // oom killer to be invoked when a new group is added as a sub-group to the 132 // larger group. 133 if action.MemoryLimit <= 4*quantity.SizeKiB { 134 return fmt.Errorf("memory limit for group %q is too small: size must be larger than 4KB", action.QuotaName) 135 } 136 137 // make sure the specified snaps exist and aren't currently in another group 138 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 139 return err 140 } 141 142 grp, allGrps, err := quotaCreateImpl(st, action, allGrps) 143 if err != nil { 144 return err 145 } 146 147 // ensure the snap services with the group 148 opts := &ensureSnapServicesForGroupOptions{ 149 allGrps: allGrps, 150 } 151 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 152 } 153 154 func quotaCreateImpl(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) { 155 // make sure that the parent group exists if we are creating a sub-group 156 var grp *quota.Group 157 var err error 158 updatedGrps := []*quota.Group{} 159 if action.ParentName != "" { 160 parentGrp, ok := allGrps[action.ParentName] 161 if !ok { 162 return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName) 163 } 164 165 grp, err = parentGrp.NewSubGroup(action.QuotaName, action.MemoryLimit) 166 if err != nil { 167 return nil, nil, err 168 } 169 170 updatedGrps = append(updatedGrps, parentGrp) 171 } else { 172 // make a new group 173 grp, err = quota.NewGroup(action.QuotaName, action.MemoryLimit) 174 if err != nil { 175 return nil, nil, err 176 } 177 } 178 updatedGrps = append(updatedGrps, grp) 179 180 // put the snaps in the group 181 grp.Snaps = action.AddSnaps 182 // update the modified groups in state 183 newAllGrps, err := patchQuotas(st, updatedGrps...) 184 if err != nil { 185 return nil, nil, err 186 } 187 188 return grp, newAllGrps, nil 189 } 190 191 func quotaRemove(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 192 // make sure the group exists 193 grp, ok := allGrps[action.QuotaName] 194 if !ok { 195 return fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName) 196 } 197 198 // make sure some of the options are not set, it's an internal error if 199 // anything other than the name and action are set for a removal 200 if action.ParentName != "" { 201 return fmt.Errorf("internal error, ParentName option cannot be used with remove action") 202 } 203 204 if len(action.AddSnaps) != 0 { 205 return fmt.Errorf("internal error, AddSnaps option cannot be used with remove action") 206 } 207 208 if action.MemoryLimit != 0 { 209 return fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action") 210 } 211 212 // XXX: remove this limitation eventually 213 if len(grp.SubGroups) != 0 { 214 return fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first") 215 } 216 217 // if this group has a parent, we need to remove the linkage to this 218 // sub-group from the parent first 219 if grp.ParentGroup != "" { 220 // the parent here must exist otherwise AllQuotas would have failed 221 // because state would have been inconsistent 222 parent := allGrps[grp.ParentGroup] 223 224 // ensure that the parent group of this group no longer mentions this 225 // group as a sub-group - we know that it must since AllQuotas validated 226 // the state for us 227 if len(parent.SubGroups) == 1 { 228 // this group was an only child, so clear the whole list 229 parent.SubGroups = nil 230 } else { 231 // we have to delete the child but keep the other children 232 newSubgroups := make([]string, 0, len(parent.SubGroups)-1) 233 for _, sub := range parent.SubGroups { 234 if sub != action.QuotaName { 235 newSubgroups = append(newSubgroups, sub) 236 } 237 } 238 239 parent.SubGroups = newSubgroups 240 } 241 242 allGrps[grp.ParentGroup] = parent 243 } 244 245 // now delete the group from state - do this first for convenience to ensure 246 // that we can just use SnapServiceOptions below and since it operates via 247 // state, it will immediately reflect the deletion 248 delete(allGrps, action.QuotaName) 249 250 // make sure that the group set is consistent before saving it - we may need 251 // to delete old links from this group's parent to the child 252 if err := quota.ResolveCrossReferences(allGrps); err != nil { 253 return fmt.Errorf("cannot remove quota %q: %v", action.QuotaName, err) 254 } 255 256 // now set it in state 257 st.Set("quotas", allGrps) 258 259 // update snap service units that may need to be re-written because they are 260 // not in a slice anymore 261 opts := &ensureSnapServicesForGroupOptions{ 262 allGrps: allGrps, 263 } 264 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 265 } 266 267 func quotaUpdate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 268 // make sure the group exists 269 grp, ok := allGrps[action.QuotaName] 270 if !ok { 271 return fmt.Errorf("group %q does not exist", action.QuotaName) 272 } 273 274 // check that ParentName is not set, since we don't currently support 275 // re-parenting 276 if action.ParentName != "" { 277 return fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName) 278 } 279 280 modifiedGrps := []*quota.Group{grp} 281 282 // now ensure that all of the snaps mentioned in AddSnaps exist as snaps and 283 // that they aren't already in an existing quota group 284 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 285 return err 286 } 287 288 // append the snaps list in the group 289 grp.Snaps = append(grp.Snaps, action.AddSnaps...) 290 291 // if the memory limit is not zero then change it too 292 if action.MemoryLimit != 0 { 293 // we disallow decreasing the memory limit because it is difficult to do 294 // so correctly with the current state of our code in 295 // EnsureSnapServices, see comment in ensureSnapServicesForGroup for 296 // full details 297 if action.MemoryLimit < grp.MemoryLimit { 298 return fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit") 299 } 300 grp.MemoryLimit = action.MemoryLimit 301 } 302 303 // update the quota group state 304 allGrps, err := patchQuotas(st, modifiedGrps...) 305 if err != nil { 306 return err 307 } 308 309 // ensure service states are updated 310 opts := &ensureSnapServicesForGroupOptions{ 311 allGrps: allGrps, 312 } 313 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 314 } 315 316 type ensureSnapServicesForGroupOptions struct { 317 // allGrps is the updated set of quota groups 318 allGrps map[string]*quota.Group 319 320 // extraSnaps is the set of extra snaps to consider when ensuring services, 321 // mainly only used when snaps are removed from quota groups 322 extraSnaps []string 323 } 324 325 // ensureSnapServicesForGroup will handle updating changes to a given quota 326 // group on disk, including re-generating systemd slice files, restarting snap 327 // services that have moved into or out of quota groups, as well as starting 328 // newly created quota groups and stopping and removing removed quota groups. 329 // This function is idempotent, in that it can be called multiple times with 330 // the same changes to be processed and nothing will be broken. This is mainly 331 // a consequence of calling wrappers.EnsureSnapServices(). 332 // Currently, it only supports handling a single group change. 333 func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions, meter progress.Meter, perfTimings *timings.Timings) error { 334 if opts == nil { 335 return fmt.Errorf("internal error: unset group information for ensuring") 336 } 337 338 allGrps := opts.allGrps 339 340 if meter == nil { 341 meter = progress.Null 342 } 343 344 if perfTimings == nil { 345 perfTimings = &timings.Timings{} 346 } 347 348 // extraSnaps []string, meter progress.Meter, perfTimings *timings.Timings 349 // build the map of snap infos to options to provide to EnsureSnapServices 350 snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{} 351 for _, sn := range append(grp.Snaps, opts.extraSnaps...) { 352 info, err := snapstate.CurrentInfo(st, sn) 353 if err != nil { 354 return err 355 } 356 357 opts, err := SnapServiceOptions(st, sn, allGrps) 358 if err != nil { 359 return err 360 } 361 362 snapSvcMap[info] = opts 363 } 364 365 // TODO: the following lines should maybe be EnsureOptionsForDevice() or 366 // something since it is duplicated a few places 367 ensureOpts := &wrappers.EnsureSnapServicesOptions{ 368 Preseeding: snapdenv.Preseeding(), 369 } 370 371 // set RequireMountedSnapdSnap if we are on UC18+ only 372 deviceCtx, err := snapstate.DeviceCtx(st, nil, nil) 373 if err != nil { 374 return err 375 } 376 377 if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" { 378 ensureOpts.RequireMountedSnapdSnap = true 379 } 380 381 grpsToStart := []*quota.Group{} 382 appsToRestartBySnap := map[*snap.Info][]*snap.AppInfo{} 383 384 collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) { 385 switch unitType { 386 case "slice": 387 // this slice was either modified or written for the first time 388 389 // There are currently 3 possible cases that have different 390 // operations required, but we ignore one of them, so there really 391 // are just 2 cases we care about: 392 // 1. If this slice was initially written, we just need to systemctl 393 // start it 394 // 2. If the slice was modified to be given more resources (i.e. a 395 // higher memory limit), then we just need to do a daemon-reload 396 // which causes systemd to modify the cgroup which will always 397 // work since a cgroup can be atomically given more resources 398 // without issue since the cgroup can't be using more than the 399 // current limit. 400 // 3. If the slice was modified to be given _less_ resources (i.e. a 401 // lower memory limit), then we need to stop the services before 402 // issuing the daemon-reload to systemd, then do the 403 // daemon-reload which will succeed in modifying the cgroup, then 404 // start the services we stopped back up again. This is because 405 // otherwise if the services are currently running and using more 406 // resources than they would be allowed after the modification is 407 // applied by systemd to the cgroup, the kernel responds with 408 // EBUSY, and it isn't clear if the modification is then properly 409 // in place or not. 410 // 411 // We will already have called daemon-reload at the end of 412 // EnsureSnapServices directly, so handling case 3 is difficult, and 413 // for now we disallow making this sort of change to a quota group, 414 // that logic is handled at a higher level than this function. 415 // Thus the only decision we really have to make is if the slice was 416 // newly written or not, and if it was save it for later 417 if old == "" { 418 grpsToStart = append(grpsToStart, grp) 419 } 420 421 case "service": 422 // in this case, the only way that a service could have been changed 423 // was if it was moved into or out of a slice, in both cases we need 424 // to restart the service 425 sn := app.Snap 426 appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app) 427 428 // TODO: what about sockets and timers? activation units just start 429 // the full unit, so as long as the full unit is restarted we should 430 // be okay? 431 } 432 } 433 if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meter); err != nil { 434 return err 435 } 436 437 if ensureOpts.Preseeding { 438 return nil 439 } 440 441 // TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()? 442 systemSysd := systemd.New(systemd.SystemMode, meter) 443 444 // now start the slices 445 for _, grp := range grpsToStart { 446 // TODO: what should these timeouts for stopping/restart slices be? 447 if err := systemSysd.Start(grp.SliceFileName()); err != nil { 448 return err 449 } 450 } 451 452 // after starting all the grps that we modified from EnsureSnapServices, 453 // we need to handle the case where a quota was removed, this will only 454 // happen one at a time and can be identified by the grp provided to us 455 // not existing in the state 456 if _, ok := allGrps[grp.Name]; !ok { 457 // stop the quota group, then remove it 458 if !ensureOpts.Preseeding { 459 if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil { 460 logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err) 461 } 462 } 463 464 // TODO: this results in a second systemctl daemon-reload which is 465 // undesirable, we should figure out how to do this operation with a 466 // single daemon-reload 467 err := wrappers.RemoveQuotaGroup(grp, meter) 468 if err != nil { 469 return err 470 } 471 } 472 473 // after we have made all the persistent modifications to disk and state, 474 // set the task as done, what remains for this task handler is just to 475 // restart services which will happen regardless if we get rebooted after 476 // unlocking the state - if we got rebooted before unlocking the state, none 477 // of the changes we made to state would be persisted and we would run 478 // through everything above here again, but the second time around 479 // EnsureSnapServices would end up doing nothing since it is idempotent. 480 if t != nil { 481 t.SetStatus(state.DoneStatus) 482 } 483 484 // now restart the services for each snap that was newly moved into a quota 485 // group 486 487 // iterate in a sorted order over the snaps to restart their apps for easy 488 // tests 489 snaps := make([]*snap.Info, 0, len(appsToRestartBySnap)) 490 for sn := range appsToRestartBySnap { 491 snaps = append(snaps, sn) 492 } 493 494 sort.Slice(snaps, func(i, j int) bool { 495 return snaps[i].InstanceName() < snaps[j].InstanceName() 496 }) 497 498 for _, sn := range snaps { 499 startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn]) 500 if err != nil { 501 return err 502 } 503 504 st.Unlock() 505 err = wrappers.RestartServices(startupOrdered, nil, nil, meter, perfTimings) 506 st.Lock() 507 508 if err != nil { 509 return err 510 } 511 } 512 return nil 513 } 514 515 func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error { 516 for _, name := range snaps { 517 // validate that the snap exists 518 _, err := snapstate.CurrentInfo(st, name) 519 if err != nil { 520 return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err) 521 } 522 523 // check that the snap is not already in a group 524 for _, grp := range allGrps { 525 if strutil.ListContains(grp.Snaps, name) { 526 return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name) 527 } 528 } 529 } 530 531 return nil 532 }