gopkg.in/ubuntu-core/snappy.v0@v0.0.0-20210902073436-25a8614f10a6/overlord/servicestate/servicemgr.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package servicestate 21 22 import ( 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "strings" 28 "time" 29 30 "github.com/snapcore/snapd/dirs" 31 "github.com/snapcore/snapd/logger" 32 "github.com/snapcore/snapd/osutil" 33 "github.com/snapcore/snapd/overlord/snapstate" 34 "github.com/snapcore/snapd/overlord/state" 35 "github.com/snapcore/snapd/progress" 36 "github.com/snapcore/snapd/snap" 37 "github.com/snapcore/snapd/snap/quota" 38 "github.com/snapcore/snapd/snapdenv" 39 "github.com/snapcore/snapd/systemd" 40 "github.com/snapcore/snapd/timings" 41 "github.com/snapcore/snapd/wrappers" 42 ) 43 44 // ServiceManager is responsible for starting and stopping snap services. 45 type ServiceManager struct { 46 state *state.State 47 48 ensuredSnapSvcs bool 49 } 50 51 // Manager returns a new service manager. 52 func Manager(st *state.State, runner *state.TaskRunner) *ServiceManager { 53 delayedCrossMgrInit() 54 m := &ServiceManager{ 55 state: st, 56 } 57 // TODO: undo handler 58 runner.AddHandler("service-control", m.doServiceControl, nil) 59 60 // TODO: undo handler 61 runner.AddHandler("quota-control", m.doQuotaControl, nil) 62 63 snapstate.AddAffectedSnapsByKind("quota-control", quotaControlAffectedSnaps) 64 65 return m 66 } 67 68 func MockEnsuredSnapServices(mgr *ServiceManager, ensured bool) (restore func()) { 69 osutil.MustBeTestBinary("ensured snap services can only be mocked from tests") 70 old := mgr.ensuredSnapSvcs 71 mgr.ensuredSnapSvcs = ensured 72 return func() { 73 mgr.ensuredSnapSvcs = old 74 } 75 } 76 77 func (m *ServiceManager) ensureSnapServicesUpdated() (err error) { 78 m.state.Lock() 79 defer m.state.Unlock() 80 if m.ensuredSnapSvcs { 81 return nil 82 } 83 84 // only run after we are seeded 85 var seeded bool 86 err = m.state.Get("seeded", &seeded) 87 if err != nil && err != state.ErrNoState { 88 return err 89 } 90 if !seeded { 91 return nil 92 } 93 94 // we are seeded, now we need to find all snap services and re-generate 95 // services as necessary 96 97 // ensure all snap services are updated 98 allStates, err := snapstate.All(m.state) 99 if err != nil && err != state.ErrNoState { 100 return err 101 } 102 103 // if we have no snaps we can exit early 104 if len(allStates) == 0 { 105 m.ensuredSnapSvcs = true 106 return nil 107 } 108 109 allGrps, err := AllQuotas(m.state) 110 if err != nil && err != state.ErrNoState { 111 return err 112 } 113 114 snapsMap := map[*snap.Info]*wrappers.SnapServiceOptions{} 115 116 for _, snapSt := range allStates { 117 info, err := snapSt.CurrentInfo() 118 if err != nil { 119 return err 120 } 121 122 // don't use EnsureSnapServices with the snapd snap 123 if info.Type() == snap.TypeSnapd { 124 continue 125 } 126 127 // use the cached copy of all quota groups 128 snapSvcOpts, err := SnapServiceOptions(m.state, info.InstanceName(), allGrps) 129 if err != nil { 130 return err 131 } 132 snapsMap[info] = snapSvcOpts 133 } 134 135 // setup ensure options 136 ensureOpts := &wrappers.EnsureSnapServicesOptions{ 137 Preseeding: snapdenv.Preseeding(), 138 } 139 140 // set RequireMountedSnapdSnap if we are on UC18+ only 141 deviceCtx, err := snapstate.DeviceCtx(m.state, nil, nil) 142 if err != nil { 143 return err 144 } 145 146 if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" { 147 ensureOpts.RequireMountedSnapdSnap = true 148 } 149 150 rewrittenServices := make(map[*snap.Info][]*snap.AppInfo) 151 serviceKillingMightHaveOccurred := false 152 observeChange := func(app *snap.AppInfo, _ *quota.Group, unitType, name string, old, new string) { 153 if unitType == "service" { 154 rewrittenServices[app.Snap] = append(rewrittenServices[app.Snap], app) 155 if !serviceKillingMightHaveOccurred { 156 if strings.Contains(old, "\nRequires=usr-lib-snapd.mount\n") { 157 serviceKillingMightHaveOccurred = true 158 } 159 } 160 } 161 } 162 163 err = wrappers.EnsureSnapServices(snapsMap, ensureOpts, observeChange, progress.Null) 164 if err != nil { 165 return err 166 } 167 168 // if nothing was modified or we are not on UC18+, we are done 169 if len(rewrittenServices) == 0 || deviceCtx.Classic() || deviceCtx.Model().Base() == "" || !serviceKillingMightHaveOccurred { 170 m.ensuredSnapSvcs = true 171 return nil 172 } 173 174 // otherwise, we know now that we have rewritten some snap services, we need 175 // to handle the case of LP #1924805, and restart any services that were 176 // accidentally killed when we refreshed snapd 177 if err := restartServicesKilledInSnapdSnapRefresh(rewrittenServices); err != nil { 178 // we failed to restart services that were killed by a snapd refresh, so 179 // we need to immediately reboot in the hopes that this restores 180 // services to a functioning state 181 182 m.state.RequestRestart(state.RestartSystemNow) 183 return fmt.Errorf("error trying to restart killed services, immediately rebooting: %v", err) 184 } 185 186 m.ensuredSnapSvcs = true 187 188 return nil 189 } 190 191 // Ensure implements StateManager.Ensure. 192 func (m *ServiceManager) Ensure() error { 193 if err := m.ensureSnapServicesUpdated(); err != nil { 194 return err 195 } 196 return nil 197 } 198 199 func delayedCrossMgrInit() { 200 // hook into conflict checks mechanisms 201 snapstate.AddAffectedSnapsByAttr("service-action", serviceControlAffectedSnaps) 202 snapstate.SnapServiceOptions = SnapServiceOptions 203 snapstate.EnsureSnapAbsentFromQuotaGroup = EnsureSnapAbsentFromQuota 204 } 205 206 func serviceControlAffectedSnaps(t *state.Task) ([]string, error) { 207 var serviceAction ServiceAction 208 if err := t.Get("service-action", &serviceAction); err != nil { 209 return nil, fmt.Errorf("internal error: cannot obtain service action from task: %s", t.Summary()) 210 } 211 return []string{serviceAction.SnapName}, nil 212 } 213 214 func getBootTime() (time.Time, error) { 215 cmd := exec.Command("uptime", "-s") 216 cmd.Env = append(cmd.Env, "TZ=UTC") 217 out, err := cmd.CombinedOutput() 218 if err != nil { 219 return time.Time{}, osutil.OutputErr(out, err) 220 } 221 222 // parse the output from the command as a time 223 t, err := time.ParseInLocation("2006-01-02 15:04:05", strings.TrimSpace(string(out)), time.UTC) 224 if err != nil { 225 return time.Time{}, err 226 } 227 228 return t, nil 229 } 230 231 func restartServicesKilledInSnapdSnapRefresh(modified map[*snap.Info][]*snap.AppInfo) error { 232 // we decide on which services to restart by identifying (out of the set of 233 // services we just modified) services that were stopped after 234 // usr-lib-snapd.mount was written, but before usr-lib-snapd.mount was last 235 // stopped - this is the time window in which snapd (accidentally) killed 236 // all snap services using Requires=, see LP #1924805 for full details, so 237 // we need to undo that by restarting those snaps 238 239 st, err := os.Stat(filepath.Join(dirs.SnapServicesDir, wrappers.SnapdToolingMountUnit)) 240 if err != nil { 241 return err 242 } 243 244 // always truncate all times to second precision, since that is the least 245 // precise time we have of all the times we consider, due to using systemctl 246 // for getting the InactiveEnterTimestamp for systemd units 247 // TODO: we should switch back to using D-Bus for this, where we get much 248 // more accurate times, down to the microsecond, which is the same precision 249 // we have for the modification time here, and thus we can more easily avoid 250 // the truncation issue, and we can ensure that we are minimizing the risk 251 // of inadvertently starting services that just so happened to have been 252 // stopped in the same second that we modified and usr-lib-snapd.mount. 253 lowerTimeBound := st.ModTime().Truncate(time.Second) 254 255 // if the time that the usr-lib-snapd.mount was modified is before the time 256 // that this device was booted up, then we can skip this since we know we 257 // that a refresh is not being performed 258 bootTime, err := getBootTime() 259 if err != nil { 260 // don't fail if we can't get the boot time, if we don't get it the 261 // below check will be always false (no time can be before zero time) 262 logger.Noticef("error getting boot time: %v", err) 263 } 264 265 if lowerTimeBound.Before(bootTime) { 266 return nil 267 } 268 269 // Get the InactiveEnterTimestamp property for the usr-lib-snapd.mount unit, 270 // this is the time that usr-lib-snapd.mount was transitioned from 271 // deactivating to inactive and was done being started. This is the correct 272 // upper bound for our window in which systemd killed snap services because 273 // systemd orders the transactions when we stop usr-lib-snapd.mount thusly: 274 // 275 // 1. Find all units which have Requires=usr-lib-snapd.mount (all snap 276 // services which would have been refreshed during snapd 2.49.2) 277 // 2. Stop all such services found in 1. 278 // 3. Stop usr-lib-snapd.mount itself. 279 // 280 // Thus the time after all the services were killed is given by the time 281 // that systemd transitioned usr-lib-snapd.mount to inactive, which is given 282 // by InactiveEnterTimestamp. 283 284 // TODO: pass a real interactor here? 285 sysd := systemd.New(systemd.SystemMode, progress.Null) 286 287 upperTimeBound, err := sysd.InactiveEnterTimestamp(wrappers.SnapdToolingMountUnit) 288 if err != nil { 289 return err 290 } 291 292 if upperTimeBound.IsZero() { 293 // this means that the usr-lib-snapd.mount unit never exited during this 294 // boot, which means we are done in this ensure because the bug we care 295 // about (LP #1924805) here was never triggered 296 return nil 297 } 298 299 upperTimeBound = upperTimeBound.Truncate(time.Second) 300 301 // if the lower time bound is ever in the future past the upperTimeBound, 302 // then just use the upperTimeBound as both limits, since we know that the 303 // upper bound and the time for each service being stopped are of the same 304 // precision 305 if lowerTimeBound.After(upperTimeBound) { 306 lowerTimeBound = upperTimeBound 307 } 308 309 candidateAppsToRestartBySnap := make(map[*snap.Info][]*snap.AppInfo) 310 311 for sn, apps := range modified { 312 for _, app := range apps { 313 // get the InactiveEnterTimestamp for the service 314 t, err := sysd.InactiveEnterTimestamp(app.ServiceName()) 315 if err != nil { 316 return err 317 } 318 319 // always truncate to second precision 320 t = t.Truncate(time.Second) 321 322 // check if this unit entered the inactive state between the time 323 // range, but be careful about time precision here, we want an 324 // inclusive range i.e. [lower,upper] not (lower,upper) in case the 325 // time that systemd saves these events as is imprecise or slow and 326 // things get saved as having happened at the exact same time 327 if !t.Before(lowerTimeBound) && !t.After(upperTimeBound) { 328 candidateAppsToRestartBySnap[sn] = append(candidateAppsToRestartBySnap[sn], app) 329 } 330 } 331 } 332 333 // Second loop actually restarts the services per-snap by sorting them and 334 // removing disabled services. Note that we could have disabled services 335 // here because a service could have been running, but disabled when snapd 336 // was refreshed, hence it got killed, but we don't want to restart it, 337 // since it is disabled, and so that disabled running service is just SOL. 338 for sn, apps := range candidateAppsToRestartBySnap { 339 // TODO: should we try to start as many services as possible here before 340 // giving up given the severity of the bug? 341 disabledSvcs, err := wrappers.QueryDisabledServices(sn, progress.Null) 342 if err != nil { 343 return err 344 } 345 346 startupOrdered, err := snap.SortServices(apps) 347 if err != nil { 348 return err 349 } 350 351 // TODO: what to do about timings here? 352 nullPerfTimings := &timings.Timings{} 353 if err := wrappers.StartServices(startupOrdered, disabledSvcs, nil, progress.Null, nullPerfTimings); err != nil { 354 return err 355 } 356 } 357 358 return nil 359 }