github.com/david-imola/snapd@v0.0.0-20210611180407-2de8ddeece6d/overlord/servicestate/quota_handlers.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package servicestate 21 22 import ( 23 "fmt" 24 "sort" 25 "time" 26 27 tomb "gopkg.in/tomb.v2" 28 29 "github.com/snapcore/snapd/gadget/quantity" 30 "github.com/snapcore/snapd/logger" 31 "github.com/snapcore/snapd/overlord/snapstate" 32 "github.com/snapcore/snapd/overlord/state" 33 "github.com/snapcore/snapd/progress" 34 "github.com/snapcore/snapd/snap" 35 "github.com/snapcore/snapd/snap/quota" 36 "github.com/snapcore/snapd/snapdenv" 37 "github.com/snapcore/snapd/strutil" 38 "github.com/snapcore/snapd/systemd" 39 "github.com/snapcore/snapd/timings" 40 "github.com/snapcore/snapd/wrappers" 41 ) 42 43 // QuotaControlAction is the serialized representation of a quota group 44 // modification that lives in a task. 45 type QuotaControlAction struct { 46 // QuotaName is the name of the quota group being controlled. 47 QuotaName string `json:"quota-name"` 48 49 // Action is the action being taken on the quota group. It can be either 50 // "create", "update", or "remove". 51 Action string `json:"action"` 52 53 // AddSnaps is the set of snaps to add to the quota group, valid for either 54 // the "update" or the "create" actions. 55 AddSnaps []string `json:"snaps"` 56 57 // MemoryLimit is the memory limit for the quota group being controlled, 58 // either the initial limit the group is created with for the "create" 59 // action, or if non-zero for the "update" the memory limit, then the new 60 // value to be set. 61 MemoryLimit quantity.Size 62 63 // ParentName is the name of the parent for the quota group if it is being 64 // created. Eventually this could be used with the "update" action to 65 // support moving quota groups from one parent to another, but that is 66 // currently not supported. 67 ParentName string 68 } 69 70 func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error { 71 st := t.State() 72 st.Lock() 73 defer st.Unlock() 74 75 perfTimings := state.TimingsForTask(t) 76 defer perfTimings.Save(st) 77 78 meter := snapstate.NewTaskProgressAdapterUnlocked(t) 79 80 qcs := []QuotaControlAction{} 81 err := t.Get("quota-control-actions", &qcs) 82 if err != nil { 83 return fmt.Errorf("internal error: cannot get quota-control-action: %v", err) 84 } 85 86 // TODO: support more than one action 87 switch { 88 case len(qcs) > 1: 89 return fmt.Errorf("multiple quota group actions not supported yet") 90 case len(qcs) == 0: 91 return fmt.Errorf("internal error: no quota group actions for quota-control task") 92 } 93 94 qc := qcs[0] 95 96 allGrps, err := AllQuotas(st) 97 if err != nil { 98 return err 99 } 100 101 switch qc.Action { 102 case "create": 103 err = quotaCreate(st, t, qc, allGrps, meter, perfTimings) 104 case "remove": 105 err = quotaRemove(st, t, qc, allGrps, meter, perfTimings) 106 case "update": 107 err = quotaUpdate(st, t, qc, allGrps, meter, perfTimings) 108 default: 109 err = fmt.Errorf("unknown action %q requested", qc.Action) 110 } 111 112 return err 113 } 114 115 func quotaCreate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 116 // make sure the group does not exist yet 117 if _, ok := allGrps[action.QuotaName]; ok { 118 return fmt.Errorf("group %q already exists", action.QuotaName) 119 } 120 121 // make sure the memory limit is not zero 122 // TODO: this needs to be updated to 4K when PR snapcore/snapd#10346 lands 123 // and an equivalent check needs to be put back into CreateQuota() before 124 // the tasks are created 125 if action.MemoryLimit == 0 { 126 return fmt.Errorf("internal error, MemoryLimit option is mandatory for create action") 127 } 128 129 // make sure the specified snaps exist and aren't currently in another group 130 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 131 return err 132 } 133 134 grp, allGrps, err := quotaCreateImpl(st, action, allGrps) 135 if err != nil { 136 return err 137 } 138 139 // ensure the snap services with the group 140 opts := &ensureSnapServicesForGroupOptions{ 141 allGrps: allGrps, 142 } 143 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 144 } 145 146 func quotaCreateImpl(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) { 147 // make sure that the parent group exists if we are creating a sub-group 148 var grp *quota.Group 149 var err error 150 updatedGrps := []*quota.Group{} 151 if action.ParentName != "" { 152 parentGrp, ok := allGrps[action.ParentName] 153 if !ok { 154 return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName) 155 } 156 157 grp, err = parentGrp.NewSubGroup(action.QuotaName, action.MemoryLimit) 158 if err != nil { 159 return nil, nil, err 160 } 161 162 updatedGrps = append(updatedGrps, parentGrp) 163 } else { 164 // make a new group 165 grp, err = quota.NewGroup(action.QuotaName, action.MemoryLimit) 166 if err != nil { 167 return nil, nil, err 168 } 169 } 170 updatedGrps = append(updatedGrps, grp) 171 172 // put the snaps in the group 173 grp.Snaps = action.AddSnaps 174 // update the modified groups in state 175 newAllGrps, err := patchQuotas(st, updatedGrps...) 176 if err != nil { 177 return nil, nil, err 178 } 179 180 return grp, newAllGrps, nil 181 } 182 183 func quotaRemove(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 184 // make sure the group exists 185 grp, ok := allGrps[action.QuotaName] 186 if !ok { 187 return fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName) 188 } 189 190 // make sure some of the options are not set, it's an internal error if 191 // anything other than the name and action are set for a removal 192 if action.ParentName != "" { 193 return fmt.Errorf("internal error, ParentName option cannot be used with remove action") 194 } 195 196 if len(action.AddSnaps) != 0 { 197 return fmt.Errorf("internal error, AddSnaps option cannot be used with remove action") 198 } 199 200 if action.MemoryLimit != 0 { 201 return fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action") 202 } 203 204 // XXX: remove this limitation eventually 205 if len(grp.SubGroups) != 0 { 206 return fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first") 207 } 208 209 // if this group has a parent, we need to remove the linkage to this 210 // sub-group from the parent first 211 if grp.ParentGroup != "" { 212 // the parent here must exist otherwise AllQuotas would have failed 213 // because state would have been inconsistent 214 parent := allGrps[grp.ParentGroup] 215 216 // ensure that the parent group of this group no longer mentions this 217 // group as a sub-group - we know that it must since AllQuotas validated 218 // the state for us 219 if len(parent.SubGroups) == 1 { 220 // this group was an only child, so clear the whole list 221 parent.SubGroups = nil 222 } else { 223 // we have to delete the child but keep the other children 224 newSubgroups := make([]string, 0, len(parent.SubGroups)-1) 225 for _, sub := range parent.SubGroups { 226 if sub != action.QuotaName { 227 newSubgroups = append(newSubgroups, sub) 228 } 229 } 230 231 parent.SubGroups = newSubgroups 232 } 233 234 allGrps[grp.ParentGroup] = parent 235 } 236 237 // now delete the group from state - do this first for convenience to ensure 238 // that we can just use SnapServiceOptions below and since it operates via 239 // state, it will immediately reflect the deletion 240 delete(allGrps, action.QuotaName) 241 242 // make sure that the group set is consistent before saving it - we may need 243 // to delete old links from this group's parent to the child 244 if err := quota.ResolveCrossReferences(allGrps); err != nil { 245 return fmt.Errorf("cannot remove quota %q: %v", action.QuotaName, err) 246 } 247 248 // now set it in state 249 st.Set("quotas", allGrps) 250 251 // update snap service units that may need to be re-written because they are 252 // not in a slice anymore 253 opts := &ensureSnapServicesForGroupOptions{ 254 allGrps: allGrps, 255 } 256 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 257 } 258 259 func quotaUpdate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error { 260 // make sure the group exists 261 grp, ok := allGrps[action.QuotaName] 262 if !ok { 263 return fmt.Errorf("group %q does not exist", action.QuotaName) 264 } 265 266 // check that ParentName is not set, since we don't currently support 267 // re-parenting 268 if action.ParentName != "" { 269 return fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName) 270 } 271 272 modifiedGrps := []*quota.Group{grp} 273 274 // now ensure that all of the snaps mentioned in AddSnaps exist as snaps and 275 // that they aren't already in an existing quota group 276 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 277 return err 278 } 279 280 // append the snaps list in the group 281 grp.Snaps = append(grp.Snaps, action.AddSnaps...) 282 283 // if the memory limit is not zero then change it too 284 if action.MemoryLimit != 0 { 285 // we disallow decreasing the memory limit because it is difficult to do 286 // so correctly with the current state of our code in 287 // EnsureSnapServices, see comment in ensureSnapServicesForGroup for 288 // full details 289 if action.MemoryLimit < grp.MemoryLimit { 290 return fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit") 291 } 292 grp.MemoryLimit = action.MemoryLimit 293 } 294 295 // update the quota group state 296 allGrps, err := patchQuotas(st, modifiedGrps...) 297 if err != nil { 298 return err 299 } 300 301 // ensure service states are updated 302 opts := &ensureSnapServicesForGroupOptions{ 303 allGrps: allGrps, 304 } 305 return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings) 306 } 307 308 type ensureSnapServicesForGroupOptions struct { 309 // allGrps is the updated set of quota groups 310 allGrps map[string]*quota.Group 311 312 // extraSnaps is the set of extra snaps to consider when ensuring services, 313 // mainly only used when snaps are removed from quota groups 314 extraSnaps []string 315 } 316 317 // ensureSnapServicesForGroup will handle updating changes to a given quota 318 // group on disk, including re-generating systemd slice files, restarting snap 319 // services that have moved into or out of quota groups, as well as starting 320 // newly created quota groups and stopping and removing removed quota groups. 321 // This function is idempotent, in that it can be called multiple times with 322 // the same changes to be processed and nothing will be broken. This is mainly 323 // a consequence of calling wrappers.EnsureSnapServices(). 324 // Currently, it only supports handling a single group change. 325 func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions, meter progress.Meter, perfTimings *timings.Timings) error { 326 if opts == nil { 327 return fmt.Errorf("internal error: unset group information for ensuring") 328 } 329 330 allGrps := opts.allGrps 331 332 if meter == nil { 333 meter = progress.Null 334 } 335 336 if perfTimings == nil { 337 perfTimings = &timings.Timings{} 338 } 339 340 // extraSnaps []string, meter progress.Meter, perfTimings *timings.Timings 341 // build the map of snap infos to options to provide to EnsureSnapServices 342 snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{} 343 for _, sn := range append(grp.Snaps, opts.extraSnaps...) { 344 info, err := snapstate.CurrentInfo(st, sn) 345 if err != nil { 346 return err 347 } 348 349 opts, err := SnapServiceOptions(st, sn, allGrps) 350 if err != nil { 351 return err 352 } 353 354 snapSvcMap[info] = opts 355 } 356 357 // TODO: the following lines should maybe be EnsureOptionsForDevice() or 358 // something since it is duplicated a few places 359 ensureOpts := &wrappers.EnsureSnapServicesOptions{ 360 Preseeding: snapdenv.Preseeding(), 361 } 362 363 // set RequireMountedSnapdSnap if we are on UC18+ only 364 deviceCtx, err := snapstate.DeviceCtx(st, nil, nil) 365 if err != nil { 366 return err 367 } 368 369 if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" { 370 ensureOpts.RequireMountedSnapdSnap = true 371 } 372 373 grpsToStart := []*quota.Group{} 374 appsToRestartBySnap := map[*snap.Info][]*snap.AppInfo{} 375 376 collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) { 377 switch unitType { 378 case "slice": 379 // this slice was either modified or written for the first time 380 381 // There are currently 3 possible cases that have different 382 // operations required, but we ignore one of them, so there really 383 // are just 2 cases we care about: 384 // 1. If this slice was initially written, we just need to systemctl 385 // start it 386 // 2. If the slice was modified to be given more resources (i.e. a 387 // higher memory limit), then we just need to do a daemon-reload 388 // which causes systemd to modify the cgroup which will always 389 // work since a cgroup can be atomically given more resources 390 // without issue since the cgroup can't be using more than the 391 // current limit. 392 // 3. If the slice was modified to be given _less_ resources (i.e. a 393 // lower memory limit), then we need to stop the services before 394 // issuing the daemon-reload to systemd, then do the 395 // daemon-reload which will succeed in modifying the cgroup, then 396 // start the services we stopped back up again. This is because 397 // otherwise if the services are currently running and using more 398 // resources than they would be allowed after the modification is 399 // applied by systemd to the cgroup, the kernel responds with 400 // EBUSY, and it isn't clear if the modification is then properly 401 // in place or not. 402 // 403 // We will already have called daemon-reload at the end of 404 // EnsureSnapServices directly, so handling case 3 is difficult, and 405 // for now we disallow making this sort of change to a quota group, 406 // that logic is handled at a higher level than this function. 407 // Thus the only decision we really have to make is if the slice was 408 // newly written or not, and if it was save it for later 409 if old == "" { 410 grpsToStart = append(grpsToStart, grp) 411 } 412 413 case "service": 414 // in this case, the only way that a service could have been changed 415 // was if it was moved into or out of a slice, in both cases we need 416 // to restart the service 417 sn := app.Snap 418 appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app) 419 420 // TODO: what about sockets and timers? activation units just start 421 // the full unit, so as long as the full unit is restarted we should 422 // be okay? 423 } 424 } 425 if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meter); err != nil { 426 return err 427 } 428 429 if ensureOpts.Preseeding { 430 return nil 431 } 432 433 // TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()? 434 systemSysd := systemd.New(systemd.SystemMode, meter) 435 436 // now start the slices 437 for _, grp := range grpsToStart { 438 // TODO: what should these timeouts for stopping/restart slices be? 439 if err := systemSysd.Start(grp.SliceFileName()); err != nil { 440 return err 441 } 442 } 443 444 // after starting all the grps that we modified from EnsureSnapServices, 445 // we need to handle the case where a quota was removed, this will only 446 // happen one at a time and can be identified by the grp provided to us 447 // not existing in the state 448 if _, ok := allGrps[grp.Name]; !ok { 449 // stop the quota group, then remove it 450 if !ensureOpts.Preseeding { 451 if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil { 452 logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err) 453 } 454 } 455 456 // TODO: this results in a second systemctl daemon-reload which is 457 // undesirable, we should figure out how to do this operation with a 458 // single daemon-reload 459 err := wrappers.RemoveQuotaGroup(grp, meter) 460 if err != nil { 461 return err 462 } 463 } 464 465 // after we have made all the persistent modifications to disk and state, 466 // set the task as done, what remains for this task handler is just to 467 // restart services which will happen regardless if we get rebooted after 468 // unlocking the state - if we got rebooted before unlocking the state, none 469 // of the changes we made to state would be persisted and we would run 470 // through everything above here again, but the second time around 471 // EnsureSnapServices would end up doing nothing since it is idempotent. 472 if t != nil { 473 t.SetStatus(state.DoneStatus) 474 } 475 476 // now restart the services for each snap that was newly moved into a quota 477 // group 478 479 // iterate in a sorted order over the snaps to restart their apps for easy 480 // tests 481 snaps := make([]*snap.Info, 0, len(appsToRestartBySnap)) 482 for sn := range appsToRestartBySnap { 483 snaps = append(snaps, sn) 484 } 485 486 sort.Slice(snaps, func(i, j int) bool { 487 return snaps[i].InstanceName() < snaps[j].InstanceName() 488 }) 489 490 for _, sn := range snaps { 491 st.Unlock() 492 disabledSvcs, err := wrappers.QueryDisabledServices(sn, meter) 493 st.Lock() 494 if err != nil { 495 return err 496 } 497 498 isDisabledSvc := make(map[string]bool, len(disabledSvcs)) 499 for _, svc := range disabledSvcs { 500 isDisabledSvc[svc] = true 501 } 502 503 startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn]) 504 if err != nil { 505 return err 506 } 507 508 // drop disabled services from the startup ordering 509 startupOrderedMinusDisabled := make([]*snap.AppInfo, 0, len(startupOrdered)-len(disabledSvcs)) 510 511 for _, svc := range startupOrdered { 512 if !isDisabledSvc[svc.ServiceName()] { 513 startupOrderedMinusDisabled = append(startupOrderedMinusDisabled, svc) 514 } 515 } 516 517 st.Unlock() 518 err = wrappers.RestartServices(startupOrderedMinusDisabled, nil, meter, perfTimings) 519 st.Lock() 520 521 if err != nil { 522 return err 523 } 524 } 525 return nil 526 } 527 528 func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error { 529 for _, name := range snaps { 530 // validate that the snap exists 531 _, err := snapstate.CurrentInfo(st, name) 532 if err != nil { 533 return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err) 534 } 535 536 // check that the snap is not already in a group 537 for _, grp := range allGrps { 538 if strutil.ListContains(grp.Snaps, name) { 539 return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name) 540 } 541 } 542 } 543 544 return nil 545 }