github.com/Lephar/snapd@v0.0.0-20210825215435-c7fba9cef4d2/overlord/servicestate/quota_handlers.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package servicestate 21 22 import ( 23 "fmt" 24 "sort" 25 "time" 26 27 tomb "gopkg.in/tomb.v2" 28 29 "github.com/snapcore/snapd/gadget/quantity" 30 "github.com/snapcore/snapd/logger" 31 "github.com/snapcore/snapd/osutil" 32 "github.com/snapcore/snapd/overlord/servicestate/internal" 33 "github.com/snapcore/snapd/overlord/snapstate" 34 "github.com/snapcore/snapd/overlord/state" 35 "github.com/snapcore/snapd/progress" 36 "github.com/snapcore/snapd/snap" 37 "github.com/snapcore/snapd/snap/quota" 38 "github.com/snapcore/snapd/snapdenv" 39 "github.com/snapcore/snapd/strutil" 40 "github.com/snapcore/snapd/systemd" 41 "github.com/snapcore/snapd/timings" 42 "github.com/snapcore/snapd/wrappers" 43 ) 44 45 // QuotaControlAction is the serialized representation of a quota group 46 // modification that lives in a task. 47 type QuotaControlAction struct { 48 // QuotaName is the name of the quota group being controlled. 49 QuotaName string `json:"quota-name"` 50 51 // Action is the action being taken on the quota group. It can be either 52 // "create", "update", or "remove". 53 Action string `json:"action"` 54 55 // AddSnaps is the set of snaps to add to the quota group, valid for either 56 // the "update" or the "create" actions. 57 AddSnaps []string `json:"snaps"` 58 59 // MemoryLimit is the memory limit for the quota group being controlled, 60 // either the initial limit the group is created with for the "create" 61 // action, or if non-zero for the "update" the memory limit, then the new 62 // value to be set. 63 MemoryLimit quantity.Size 64 65 // ParentName is the name of the parent for the quota group if it is being 66 // created. Eventually this could be used with the "update" action to 67 // support moving quota groups from one parent to another, but that is 68 // currently not supported. 69 ParentName string 70 } 71 72 func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error { 73 st := t.State() 74 st.Lock() 75 defer st.Unlock() 76 77 perfTimings := state.TimingsForTask(t) 78 defer perfTimings.Save(st) 79 80 qcs := []QuotaControlAction{} 81 err := t.Get("quota-control-actions", &qcs) 82 if err != nil { 83 return fmt.Errorf("internal error: cannot get quota-control-actions: %v", err) 84 } 85 86 // TODO: support more than one action 87 switch { 88 case len(qcs) > 1: 89 return fmt.Errorf("multiple quota group actions not supported yet") 90 case len(qcs) == 0: 91 return fmt.Errorf("internal error: no quota group actions for quota-control task") 92 } 93 94 qc := qcs[0] 95 96 updated, appsToRestartBySnap, err := quotaStateAlreadyUpdated(t) 97 if err != nil { 98 return err 99 } 100 101 if !updated { 102 allGrps, err := AllQuotas(st) 103 if err != nil { 104 return err 105 } 106 107 var grp *quota.Group 108 switch qc.Action { 109 case "create": 110 grp, allGrps, err = quotaCreate(st, qc, allGrps) 111 case "remove": 112 grp, allGrps, err = quotaRemove(st, qc, allGrps) 113 case "update": 114 grp, allGrps, err = quotaUpdate(st, qc, allGrps) 115 default: 116 return fmt.Errorf("unknown action %q requested", qc.Action) 117 } 118 119 if err != nil { 120 return err 121 } 122 123 // ensure service and slices on disk and their states are updated 124 opts := &ensureSnapServicesForGroupOptions{ 125 allGrps: allGrps, 126 } 127 appsToRestartBySnap, err = ensureSnapServicesForGroup(st, t, grp, opts) 128 if err != nil { 129 return err 130 } 131 132 // All persistent modifications to disk are made and the 133 // modifications to state will be committed by the 134 // unlocking in restartSnapServices. If snapd gets 135 // restarted before the end of this task, all the 136 // modifications would be redone, and those 137 // non-idempotent parts of the task would fail. 138 // For this reason we record together with the changes 139 // in state the fact that the changes were made, 140 // to avoid repeating them. 141 // What remains for this task handler is just to 142 // restart services which will happen regardless if we 143 // get rebooted after unlocking the state - if we got 144 // rebooted before unlocking the state, none of the 145 // changes we made to state would be persisted and we 146 // would run through everything above here again, but 147 // the second time around EnsureSnapServices would end 148 // up doing nothing since it is idempotent. So in the 149 // rare case that snapd gets restarted but is not a 150 // reboot also record which services do need 151 // restarting. There is a small chance that services 152 // will be restarted again but is preferable to the 153 // quota not applying to them. 154 if err := rememberQuotaStateUpdated(t, appsToRestartBySnap); err != nil { 155 return err 156 } 157 158 } 159 160 if err := restartSnapServices(st, t, appsToRestartBySnap, perfTimings); err != nil { 161 return err 162 } 163 t.SetStatus(state.DoneStatus) 164 return nil 165 } 166 167 var osutilBootID = osutil.BootID 168 169 type quotaStateUpdated struct { 170 BootID string `json:"boot-id"` 171 AppsToRestartBySnap map[string][]string `json:"apps-to-restart,omitempty"` 172 } 173 174 func rememberQuotaStateUpdated(t *state.Task, appsToRestartBySnap map[*snap.Info][]*snap.AppInfo) error { 175 bootID, err := osutilBootID() 176 if err != nil { 177 return err 178 } 179 appNamesBySnapName := make(map[string][]string, len(appsToRestartBySnap)) 180 for info, apps := range appsToRestartBySnap { 181 appNames := make([]string, len(apps)) 182 for i, app := range apps { 183 appNames[i] = app.Name 184 } 185 appNamesBySnapName[info.InstanceName()] = appNames 186 } 187 t.Set("state-updated", quotaStateUpdated{ 188 BootID: bootID, 189 AppsToRestartBySnap: appNamesBySnapName, 190 }) 191 return nil 192 } 193 194 func quotaStateAlreadyUpdated(t *state.Task) (ok bool, appsToRestartBySnap map[*snap.Info][]*snap.AppInfo, err error) { 195 var updated quotaStateUpdated 196 if err := t.Get("state-updated", &updated); err != nil { 197 if err == state.ErrNoState { 198 return false, nil, nil 199 } 200 return false, nil, err 201 } 202 203 bootID, err := osutilBootID() 204 if err != nil { 205 return false, nil, err 206 } 207 if bootID != updated.BootID { 208 // rebooted => nothing to restart 209 return true, nil, nil 210 } 211 212 appsToRestartBySnap = make(map[*snap.Info][]*snap.AppInfo, len(updated.AppsToRestartBySnap)) 213 st := t.State() 214 // best effort, ignore missing snaps and apps 215 for instanceName, appNames := range updated.AppsToRestartBySnap { 216 info, err := snapstate.CurrentInfo(st, instanceName) 217 if err != nil { 218 if _, ok := err.(*snap.NotInstalledError); ok { 219 t.Logf("after snapd restart, snap %q went missing", instanceName) 220 continue 221 } 222 return false, nil, err 223 } 224 apps := make([]*snap.AppInfo, 0, len(appNames)) 225 for _, appName := range appNames { 226 app := info.Apps[appName] 227 if app == nil || !app.IsService() { 228 continue 229 } 230 apps = append(apps, app) 231 } 232 appsToRestartBySnap[info] = apps 233 } 234 return true, appsToRestartBySnap, nil 235 } 236 237 func quotaCreate(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) { 238 // make sure the group does not exist yet 239 if _, ok := allGrps[action.QuotaName]; ok { 240 return nil, nil, fmt.Errorf("group %q already exists", action.QuotaName) 241 } 242 243 // make sure that the parent group exists if we are creating a sub-group 244 var parentGrp *quota.Group 245 if action.ParentName != "" { 246 var ok bool 247 parentGrp, ok = allGrps[action.ParentName] 248 if !ok { 249 return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName) 250 } 251 } 252 253 // make sure the memory limit is not zero 254 if action.MemoryLimit == 0 { 255 return nil, nil, fmt.Errorf("internal error, MemoryLimit option is mandatory for create action") 256 } 257 258 // make sure the memory limit is at least 4K, that is the minimum size 259 // to allow nesting, otherwise groups with less than 4K will trigger the 260 // oom killer to be invoked when a new group is added as a sub-group to the 261 // larger group. 262 if action.MemoryLimit <= 4*quantity.SizeKiB { 263 return nil, nil, fmt.Errorf("memory limit for group %q is too small: size must be larger than 4KB", action.QuotaName) 264 } 265 266 // make sure the specified snaps exist and aren't currently in another group 267 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 268 return nil, nil, err 269 } 270 271 return internal.CreateQuotaInState(st, action.QuotaName, parentGrp, action.AddSnaps, action.MemoryLimit, allGrps) 272 } 273 274 func quotaRemove(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) { 275 // make sure the group exists 276 grp, ok := allGrps[action.QuotaName] 277 if !ok { 278 return nil, nil, fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName) 279 } 280 281 // make sure some of the options are not set, it's an internal error if 282 // anything other than the name and action are set for a removal 283 if action.ParentName != "" { 284 return nil, nil, fmt.Errorf("internal error, ParentName option cannot be used with remove action") 285 } 286 287 if len(action.AddSnaps) != 0 { 288 return nil, nil, fmt.Errorf("internal error, AddSnaps option cannot be used with remove action") 289 } 290 291 if action.MemoryLimit != 0 { 292 return nil, nil, fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action") 293 } 294 295 // XXX: remove this limitation eventually 296 if len(grp.SubGroups) != 0 { 297 return nil, nil, fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first") 298 } 299 300 // if this group has a parent, we need to remove the linkage to this 301 // sub-group from the parent first 302 if grp.ParentGroup != "" { 303 // the parent here must exist otherwise AllQuotas would have failed 304 // because state would have been inconsistent 305 parent := allGrps[grp.ParentGroup] 306 307 // ensure that the parent group of this group no longer mentions this 308 // group as a sub-group - we know that it must since AllQuotas validated 309 // the state for us 310 if len(parent.SubGroups) == 1 { 311 // this group was an only child, so clear the whole list 312 parent.SubGroups = nil 313 } else { 314 // we have to delete the child but keep the other children 315 newSubgroups := make([]string, 0, len(parent.SubGroups)-1) 316 for _, sub := range parent.SubGroups { 317 if sub != action.QuotaName { 318 newSubgroups = append(newSubgroups, sub) 319 } 320 } 321 322 parent.SubGroups = newSubgroups 323 } 324 325 allGrps[grp.ParentGroup] = parent 326 } 327 328 // now delete the group from state - do this first for convenience to ensure 329 // that we can just use SnapServiceOptions below and since it operates via 330 // state, it will immediately reflect the deletion 331 delete(allGrps, action.QuotaName) 332 333 // make sure that the group set is consistent before saving it - we may need 334 // to delete old links from this group's parent to the child 335 if err := quota.ResolveCrossReferences(allGrps); err != nil { 336 return nil, nil, fmt.Errorf("cannot remove quota group %q: %v", action.QuotaName, err) 337 } 338 339 // now set it in state 340 st.Set("quotas", allGrps) 341 342 return grp, allGrps, nil 343 } 344 345 func quotaUpdate(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) { 346 // make sure the group exists 347 grp, ok := allGrps[action.QuotaName] 348 if !ok { 349 return nil, nil, fmt.Errorf("group %q does not exist", action.QuotaName) 350 } 351 352 // check that ParentName is not set, since we don't currently support 353 // re-parenting 354 if action.ParentName != "" { 355 return nil, nil, fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName) 356 } 357 358 modifiedGrps := []*quota.Group{grp} 359 360 // now ensure that all of the snaps mentioned in AddSnaps exist as snaps and 361 // that they aren't already in an existing quota group 362 if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil { 363 return nil, nil, err 364 } 365 366 // append the snaps list in the group 367 grp.Snaps = append(grp.Snaps, action.AddSnaps...) 368 369 // if the memory limit is not zero then change it too 370 if action.MemoryLimit != 0 { 371 // we disallow decreasing the memory limit because it is difficult to do 372 // so correctly with the current state of our code in 373 // EnsureSnapServices, see comment in ensureSnapServicesForGroup for 374 // full details 375 if action.MemoryLimit < grp.MemoryLimit { 376 return nil, nil, fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit") 377 } 378 grp.MemoryLimit = action.MemoryLimit 379 } 380 381 // update the quota group state 382 allGrps, err := internal.PatchQuotas(st, modifiedGrps...) 383 if err != nil { 384 return nil, nil, err 385 } 386 return grp, allGrps, nil 387 } 388 389 type ensureSnapServicesForGroupOptions struct { 390 // allGrps is the updated set of quota groups 391 allGrps map[string]*quota.Group 392 393 // extraSnaps is the set of extra snaps to consider when ensuring services, 394 // mainly only used when snaps are removed from quota groups 395 extraSnaps []string 396 } 397 398 // ensureSnapServicesForGroup will handle updating changes to a given 399 // quota group on disk, including re-generating systemd slice files, 400 // as well as starting newly created quota groups and stopping and 401 // removing removed quota groups. 402 // It also computes and returns snap services that have moved into or 403 // out of quota groups and need restarting. 404 // This function is idempotent, in that it can be called multiple times with 405 // the same changes to be processed and nothing will be broken. This is mainly 406 // a consequence of calling wrappers.EnsureSnapServices(). 407 // Currently, it only supports handling a single group change. 408 // It returns the snap services that needs restarts. 409 func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions) (appsToRestartBySnap map[*snap.Info][]*snap.AppInfo, err error) { 410 if opts == nil { 411 return nil, fmt.Errorf("internal error: unset group information for ensuring") 412 } 413 414 allGrps := opts.allGrps 415 416 var meterLocked progress.Meter 417 if t == nil { 418 meterLocked = progress.Null 419 } else { 420 meterLocked = snapstate.NewTaskProgressAdapterLocked(t) 421 } 422 423 // build the map of snap infos to options to provide to EnsureSnapServices 424 snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{} 425 for _, sn := range append(grp.Snaps, opts.extraSnaps...) { 426 info, err := snapstate.CurrentInfo(st, sn) 427 if err != nil { 428 return nil, err 429 } 430 431 opts, err := SnapServiceOptions(st, sn, allGrps) 432 if err != nil { 433 return nil, err 434 } 435 436 snapSvcMap[info] = opts 437 } 438 439 // TODO: the following lines should maybe be EnsureOptionsForDevice() or 440 // something since it is duplicated a few places 441 ensureOpts := &wrappers.EnsureSnapServicesOptions{ 442 Preseeding: snapdenv.Preseeding(), 443 } 444 445 // set RequireMountedSnapdSnap if we are on UC18+ only 446 deviceCtx, err := snapstate.DeviceCtx(st, nil, nil) 447 if err != nil { 448 return nil, err 449 } 450 451 if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" { 452 ensureOpts.RequireMountedSnapdSnap = true 453 } 454 455 grpsToStart := []*quota.Group{} 456 appsToRestartBySnap = map[*snap.Info][]*snap.AppInfo{} 457 458 collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) { 459 switch unitType { 460 case "slice": 461 // this slice was either modified or written for the first time 462 463 // There are currently 3 possible cases that have different 464 // operations required, but we ignore one of them, so there really 465 // are just 2 cases we care about: 466 // 1. If this slice was initially written, we just need to systemctl 467 // start it 468 // 2. If the slice was modified to be given more resources (i.e. a 469 // higher memory limit), then we just need to do a daemon-reload 470 // which causes systemd to modify the cgroup which will always 471 // work since a cgroup can be atomically given more resources 472 // without issue since the cgroup can't be using more than the 473 // current limit. 474 // 3. If the slice was modified to be given _less_ resources (i.e. a 475 // lower memory limit), then we need to stop the services before 476 // issuing the daemon-reload to systemd, then do the 477 // daemon-reload which will succeed in modifying the cgroup, then 478 // start the services we stopped back up again. This is because 479 // otherwise if the services are currently running and using more 480 // resources than they would be allowed after the modification is 481 // applied by systemd to the cgroup, the kernel responds with 482 // EBUSY, and it isn't clear if the modification is then properly 483 // in place or not. 484 // 485 // We will already have called daemon-reload at the end of 486 // EnsureSnapServices directly, so handling case 3 is difficult, and 487 // for now we disallow making this sort of change to a quota group, 488 // that logic is handled at a higher level than this function. 489 // Thus the only decision we really have to make is if the slice was 490 // newly written or not, and if it was save it for later 491 if old == "" { 492 grpsToStart = append(grpsToStart, grp) 493 } 494 495 case "service": 496 // in this case, the only way that a service could have been changed 497 // was if it was moved into or out of a slice, in both cases we need 498 // to restart the service 499 sn := app.Snap 500 appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app) 501 502 // TODO: what about sockets and timers? activation units just start 503 // the full unit, so as long as the full unit is restarted we should 504 // be okay? 505 } 506 } 507 if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meterLocked); err != nil { 508 return nil, err 509 } 510 511 if ensureOpts.Preseeding { 512 // nothing to restart 513 return nil, nil 514 } 515 516 // TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()? 517 systemSysd := systemd.New(systemd.SystemMode, meterLocked) 518 519 // now start the slices 520 for _, grp := range grpsToStart { 521 // TODO: what should these timeouts for stopping/restart slices be? 522 if err := systemSysd.Start(grp.SliceFileName()); err != nil { 523 return nil, err 524 } 525 } 526 527 // after starting all the grps that we modified from EnsureSnapServices, 528 // we need to handle the case where a quota was removed, this will only 529 // happen one at a time and can be identified by the grp provided to us 530 // not existing in the state 531 if _, ok := allGrps[grp.Name]; !ok { 532 // stop the quota group, then remove it 533 if !ensureOpts.Preseeding { 534 if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil { 535 logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err) 536 } 537 } 538 539 // TODO: this results in a second systemctl daemon-reload which is 540 // undesirable, we should figure out how to do this operation with a 541 // single daemon-reload 542 err := wrappers.RemoveQuotaGroup(grp, meterLocked) 543 if err != nil { 544 return nil, err 545 } 546 } 547 548 return appsToRestartBySnap, nil 549 } 550 551 // restartSnapServices is used to restart the services for each snap 552 // that was newly moved into a quota group iterate in a sorted order 553 // over the snaps to restart their apps for easy tests. 554 func restartSnapServices(st *state.State, t *state.Task, appsToRestartBySnap map[*snap.Info][]*snap.AppInfo, perfTimings *timings.Timings) error { 555 if len(appsToRestartBySnap) == 0 { 556 return nil 557 } 558 559 var meterUnlocked progress.Meter 560 if t == nil { 561 meterUnlocked = progress.Null 562 } else { 563 meterUnlocked = snapstate.NewTaskProgressAdapterUnlocked(t) 564 } 565 566 if perfTimings == nil { 567 perfTimings = &timings.Timings{} 568 } 569 570 st.Unlock() 571 defer st.Lock() 572 573 snaps := make([]*snap.Info, 0, len(appsToRestartBySnap)) 574 for sn := range appsToRestartBySnap { 575 snaps = append(snaps, sn) 576 } 577 578 sort.Slice(snaps, func(i, j int) bool { 579 return snaps[i].InstanceName() < snaps[j].InstanceName() 580 }) 581 582 for _, sn := range snaps { 583 startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn]) 584 if err != nil { 585 return err 586 } 587 588 err = wrappers.RestartServices(startupOrdered, nil, nil, meterUnlocked, perfTimings) 589 if err != nil { 590 return err 591 } 592 } 593 return nil 594 } 595 596 // ensureSnapServicesStateForGroup combines ensureSnapServicesForGroup and restartSnapServices 597 func ensureSnapServicesStateForGroup(st *state.State, grp *quota.Group, opts *ensureSnapServicesForGroupOptions) error { 598 appsToRestartBySnap, err := ensureSnapServicesForGroup(st, nil, grp, opts) 599 if err != nil { 600 return err 601 } 602 return restartSnapServices(st, nil, appsToRestartBySnap, nil) 603 } 604 605 func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error { 606 for _, name := range snaps { 607 // validate that the snap exists 608 _, err := snapstate.CurrentInfo(st, name) 609 if err != nil { 610 return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err) 611 } 612 613 // check that the snap is not already in a group 614 for _, grp := range allGrps { 615 if strutil.ListContains(grp.Snaps, name) { 616 return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name) 617 } 618 } 619 } 620 621 return nil 622 } 623 624 func quotaControlAffectedSnaps(t *state.Task) (snaps []string, err error) { 625 qcs := []QuotaControlAction{} 626 if err := t.Get("quota-control-actions", &qcs); err != nil { 627 return nil, fmt.Errorf("internal error: cannot get quota-control-actions: %v", err) 628 } 629 630 // if state-updated was already set we can use it 631 var updated quotaStateUpdated 632 if err := t.Get("state-updated", &updated); err != state.ErrNoState { 633 if err != nil { 634 return nil, err 635 } 636 // TODO: consider boot-id as well? 637 for snapName := range updated.AppsToRestartBySnap { 638 snaps = append(snaps, snapName) 639 } 640 // all set 641 return snaps, nil 642 } 643 644 st := t.State() 645 for _, qc := range qcs { 646 switch qc.Action { 647 case "remove": 648 // the snaps affected by a remove are implicitly 649 // the ones currently in the quota group 650 grp, err := GetQuota(st, qc.QuotaName) 651 if err != nil && err != ErrQuotaNotFound { 652 return nil, err 653 } 654 if err == nil { 655 snaps = append(snaps, grp.Snaps...) 656 } 657 default: 658 // create and update affects only the snaps 659 // explicitly mentioned 660 // TODO: this will cease to be true 661 // if we support reparenting or orphaning 662 // of quota groups 663 snaps = append(snaps, qc.AddSnaps...) 664 } 665 } 666 return snaps, nil 667 }