github.com/kubiko/snapd@v0.0.0-20201013125620-d4f3094d9ddf/overlord/snapstate/autorefresh.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2017-2020 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package snapstate 21 22 import ( 23 "fmt" 24 "os" 25 "time" 26 27 "github.com/snapcore/snapd/httputil" 28 "github.com/snapcore/snapd/i18n" 29 "github.com/snapcore/snapd/logger" 30 "github.com/snapcore/snapd/overlord/auth" 31 "github.com/snapcore/snapd/overlord/configstate/config" 32 "github.com/snapcore/snapd/overlord/state" 33 "github.com/snapcore/snapd/release" 34 "github.com/snapcore/snapd/snap" 35 "github.com/snapcore/snapd/strutil" 36 "github.com/snapcore/snapd/timeutil" 37 "github.com/snapcore/snapd/timings" 38 ) 39 40 // the default refresh pattern 41 const defaultRefreshSchedule = "00:00~24:00/4" 42 43 // cannot keep without refreshing for more than maxPostponement 44 const maxPostponement = 60 * 24 * time.Hour 45 46 // cannot inhibit refreshes for more than maxInhibition 47 const maxInhibition = 14 * 24 * time.Hour 48 49 // hooks setup by devicestate 50 var ( 51 CanAutoRefresh func(st *state.State) (bool, error) 52 CanManageRefreshes func(st *state.State) bool 53 IsOnMeteredConnection func() (bool, error) 54 ) 55 56 // refreshRetryDelay specified the minimum time to retry failed refreshes 57 var refreshRetryDelay = 20 * time.Minute 58 59 // autoRefresh will ensure that snaps are refreshed automatically 60 // according to the refresh schedule. 61 type autoRefresh struct { 62 state *state.State 63 64 lastRefreshSchedule string 65 nextRefresh time.Time 66 lastRefreshAttempt time.Time 67 managedDeniedLogged bool 68 } 69 70 func newAutoRefresh(st *state.State) *autoRefresh { 71 return &autoRefresh{ 72 state: st, 73 } 74 } 75 76 // RefreshSchedule will return a user visible string with the current schedule 77 // for the automatic refreshes and a flag indicating whether the schedule is a 78 // legacy one. 79 func (m *autoRefresh) RefreshSchedule() (schedule string, legacy bool, err error) { 80 _, schedule, legacy, err = m.refreshScheduleWithDefaultsFallback() 81 return schedule, legacy, err 82 } 83 84 // NextRefresh returns when the next automatic refresh will happen. 85 func (m *autoRefresh) NextRefresh() time.Time { 86 return m.nextRefresh 87 } 88 89 // LastRefresh returns when the last refresh happened. 90 func (m *autoRefresh) LastRefresh() (time.Time, error) { 91 return getTime(m.state, "last-refresh") 92 } 93 94 // EffectiveRefreshHold returns the time until to which refreshes are 95 // held if refresh.hold configuration is set and accounting for the 96 // max postponement since the last refresh. 97 func (m *autoRefresh) EffectiveRefreshHold() (time.Time, error) { 98 var holdTime time.Time 99 100 tr := config.NewTransaction(m.state) 101 err := tr.Get("core", "refresh.hold", &holdTime) 102 if err != nil && !config.IsNoOption(err) { 103 return time.Time{}, err 104 } 105 106 // cannot hold beyond last-refresh + max-postponement 107 lastRefresh, err := m.LastRefresh() 108 if err != nil { 109 return time.Time{}, err 110 } 111 if lastRefresh.IsZero() { 112 seedTime, err := getTime(m.state, "seed-time") 113 if err != nil { 114 return time.Time{}, err 115 } 116 if seedTime.IsZero() { 117 // no reference to know whether holding is reasonable 118 return time.Time{}, nil 119 } 120 lastRefresh = seedTime 121 } 122 123 limitTime := lastRefresh.Add(maxPostponement) 124 if holdTime.After(limitTime) { 125 return limitTime, nil 126 } 127 128 return holdTime, nil 129 } 130 131 // clearRefreshHold clears refresh.hold configuration. 132 func (m *autoRefresh) clearRefreshHold() { 133 tr := config.NewTransaction(m.state) 134 tr.Set("core", "refresh.hold", nil) 135 tr.Commit() 136 } 137 138 // AtSeed configures refresh policies at end of seeding. 139 func (m *autoRefresh) AtSeed() error { 140 // on classic hold refreshes for 2h after seeding 141 if release.OnClassic { 142 var t1 time.Time 143 tr := config.NewTransaction(m.state) 144 err := tr.Get("core", "refresh.hold", &t1) 145 if !config.IsNoOption(err) { 146 // already set or error 147 return err 148 } 149 // TODO: have a policy that if the snapd exe itself 150 // is older than X weeks/months we skip the holding? 151 now := time.Now().UTC() 152 tr.Set("core", "refresh.hold", now.Add(2*time.Hour)) 153 tr.Commit() 154 m.nextRefresh = now 155 } 156 return nil 157 } 158 159 func canRefreshOnMeteredConnection(st *state.State) (bool, error) { 160 tr := config.NewTransaction(st) 161 var onMetered string 162 err := tr.GetMaybe("core", "refresh.metered", &onMetered) 163 if err != nil && err != state.ErrNoState { 164 return false, err 165 } 166 167 return onMetered != "hold", nil 168 } 169 170 func (m *autoRefresh) canRefreshRespectingMetered(now, lastRefresh time.Time) (can bool, err error) { 171 can, err = canRefreshOnMeteredConnection(m.state) 172 if err != nil { 173 return false, err 174 } 175 if can { 176 return true, nil 177 } 178 179 // ignore any errors that occurred while checking if we are on a metered 180 // connection 181 metered, _ := IsOnMeteredConnection() 182 if !metered { 183 return true, nil 184 } 185 186 if now.Sub(lastRefresh) >= maxPostponement { 187 // TODO use warnings when the infra becomes available 188 logger.Noticef("Auto refresh disabled while on metered connections, but pending for too long (%d days). Trying to refresh now.", int(maxPostponement.Hours()/24)) 189 return true, nil 190 } 191 192 logger.Debugf("Auto refresh disabled on metered connections") 193 194 return false, nil 195 } 196 197 // Ensure ensures that we refresh all installed snaps periodically 198 func (m *autoRefresh) Ensure() error { 199 m.state.Lock() 200 defer m.state.Unlock() 201 202 // see if it even makes sense to try to refresh 203 if CanAutoRefresh == nil { 204 return nil 205 } 206 if ok, err := CanAutoRefresh(m.state); err != nil || !ok { 207 return err 208 } 209 210 // get lastRefresh and schedule 211 lastRefresh, err := m.LastRefresh() 212 if err != nil { 213 return err 214 } 215 216 refreshSchedule, refreshScheduleStr, _, err := m.refreshScheduleWithDefaultsFallback() 217 if err != nil { 218 return err 219 } 220 if len(refreshSchedule) == 0 { 221 m.nextRefresh = time.Time{} 222 return nil 223 } 224 // we already have a refresh time, check if we got a new config 225 if !m.nextRefresh.IsZero() { 226 if m.lastRefreshSchedule != refreshScheduleStr { 227 // the refresh schedule has changed 228 logger.Debugf("Refresh timer changed.") 229 m.nextRefresh = time.Time{} 230 } 231 } 232 m.lastRefreshSchedule = refreshScheduleStr 233 234 // ensure nothing is in flight already 235 if autoRefreshInFlight(m.state) { 236 return nil 237 } 238 239 now := time.Now() 240 // compute next refresh attempt time (if needed) 241 if m.nextRefresh.IsZero() { 242 // store attempts in memory so that we can backoff 243 if !lastRefresh.IsZero() { 244 delta := timeutil.Next(refreshSchedule, lastRefresh, maxPostponement) 245 now = time.Now() 246 m.nextRefresh = now.Add(delta) 247 } else { 248 // make sure either seed-time or last-refresh 249 // are set for hold code below 250 m.ensureLastRefreshAnchor() 251 // immediate 252 m.nextRefresh = now 253 } 254 logger.Debugf("Next refresh scheduled for %s.", m.nextRefresh.Format(time.RFC3339)) 255 } 256 257 held, holdTime, err := m.isRefreshHeld(refreshSchedule) 258 if err != nil { 259 return err 260 } 261 262 // do refresh attempt (if needed) 263 if !held { 264 if !holdTime.IsZero() { 265 // expired hold case 266 m.clearRefreshHold() 267 if m.nextRefresh.Before(holdTime) { 268 // next refresh is obsolete, compute the next one 269 delta := timeutil.Next(refreshSchedule, holdTime, maxPostponement) 270 now = time.Now() 271 m.nextRefresh = now.Add(delta) 272 } 273 } 274 275 // refresh is also "held" if the next time is in the future 276 // note that the two times here could be exactly equal, so we use 277 // !After() because that is true in the case that the next refresh is 278 // before now, and the next refresh is equal to now without requiring an 279 // or operation 280 if !m.nextRefresh.After(now) { 281 var can bool 282 can, err = m.canRefreshRespectingMetered(now, lastRefresh) 283 if err != nil { 284 return err 285 } 286 if !can { 287 // clear nextRefresh so that another refresh time is calculated 288 m.nextRefresh = time.Time{} 289 return nil 290 } 291 292 // Check that we have reasonable delays between attempts. 293 // If the store is under stress we need to make sure we do not 294 // hammer it too often 295 if !m.lastRefreshAttempt.IsZero() && m.lastRefreshAttempt.Add(refreshRetryDelay).After(time.Now()) { 296 return nil 297 } 298 299 err = m.launchAutoRefresh(refreshSchedule) 300 if _, ok := err.(*httputil.PersistentNetworkError); !ok { 301 m.nextRefresh = time.Time{} 302 } // else - refresh will be retried after refreshRetryDelay 303 } 304 } 305 306 return err 307 } 308 309 // isRefreshHeld returns whether an auto-refresh is currently held back or not, 310 // as indicated by m.EffectiveRefreshHold(). 311 func (m *autoRefresh) isRefreshHeld(refreshSchedule []*timeutil.Schedule) (bool, time.Time, error) { 312 now := time.Now() 313 // should we hold back refreshes? 314 holdTime, err := m.EffectiveRefreshHold() 315 if err != nil { 316 return false, time.Time{}, err 317 } 318 if holdTime.After(now) { 319 return true, holdTime, nil 320 } 321 322 return false, holdTime, nil 323 } 324 325 func (m *autoRefresh) ensureLastRefreshAnchor() { 326 seedTime, _ := getTime(m.state, "seed-time") 327 if !seedTime.IsZero() { 328 return 329 } 330 331 // last core refresh 332 coreRefreshDate := snap.InstallDate("core") 333 if !coreRefreshDate.IsZero() { 334 m.state.Set("last-refresh", coreRefreshDate) 335 return 336 } 337 338 // fallback to executable time 339 st, err := os.Stat("/proc/self/exe") 340 if err == nil { 341 m.state.Set("last-refresh", st.ModTime()) 342 return 343 } 344 } 345 346 // refreshScheduleWithDefaultsFallback returns the current refresh schedule 347 // and refresh string. When an invalid refresh schedule is set by the user 348 // the refresh schedule is automatically reset to the default. 349 // 350 // TODO: we can remove the refreshSchedule reset because we have validation 351 // of the schedule now. 352 func (m *autoRefresh) refreshScheduleWithDefaultsFallback() (ts []*timeutil.Schedule, scheduleAsStr string, legacy bool, err error) { 353 managed, requested, legacy := refreshScheduleManaged(m.state) 354 if managed { 355 if m.lastRefreshSchedule != "managed" { 356 logger.Noticef("refresh is managed via the snapd-control interface") 357 m.lastRefreshSchedule = "managed" 358 } 359 m.managedDeniedLogged = false 360 return nil, "managed", legacy, nil 361 } else if requested { 362 // managed refresh schedule was denied 363 if !m.managedDeniedLogged { 364 logger.Noticef("managed refresh schedule denied, no properly configured snapd-control") 365 m.managedDeniedLogged = true 366 } 367 // fallback to default schedule 368 return refreshScheduleDefault() 369 } else { 370 m.managedDeniedLogged = false 371 } 372 373 tr := config.NewTransaction(m.state) 374 // try the new refresh.timer config option first 375 err = tr.Get("core", "refresh.timer", &scheduleAsStr) 376 if err != nil && !config.IsNoOption(err) { 377 return nil, "", false, err 378 } 379 if scheduleAsStr != "" { 380 ts, err = timeutil.ParseSchedule(scheduleAsStr) 381 if err != nil { 382 logger.Noticef("cannot use refresh.timer configuration: %s", err) 383 return refreshScheduleDefault() 384 } 385 return ts, scheduleAsStr, false, nil 386 } 387 388 // fallback to legacy refresh.schedule setting when the new 389 // config option is not set 390 err = tr.Get("core", "refresh.schedule", &scheduleAsStr) 391 if err != nil && !config.IsNoOption(err) { 392 return nil, "", false, err 393 } 394 if scheduleAsStr != "" { 395 ts, err = timeutil.ParseLegacySchedule(scheduleAsStr) 396 if err != nil { 397 logger.Noticef("cannot use refresh.schedule configuration: %s", err) 398 return refreshScheduleDefault() 399 } 400 return ts, scheduleAsStr, true, nil 401 } 402 403 return refreshScheduleDefault() 404 } 405 406 // launchAutoRefresh creates the auto-refresh taskset and a change for it. 407 func (m *autoRefresh) launchAutoRefresh(refreshSchedule []*timeutil.Schedule) error { 408 perfTimings := timings.New(map[string]string{"ensure": "auto-refresh"}) 409 tm := perfTimings.StartSpan("auto-refresh", "query store and setup auto-refresh change") 410 defer func() { 411 tm.Stop() 412 perfTimings.Save(m.state) 413 }() 414 415 m.lastRefreshAttempt = time.Now() 416 417 // NOTE: this will unlock and re-lock state for network ops 418 updated, tasksets, err := AutoRefresh(auth.EnsureContextTODO(), m.state) 419 420 // TODO: we should have some way to lock just creating and starting changes, 421 // as that would alleviate this race condition we are guarding against 422 // with this check and probably would eliminate other similar race 423 // conditions elsewhere 424 425 // re-check if the refresh is held because it could have been re-held and 426 // pushed back, in which case we need to abort the auto-refresh and wait 427 held, _, holdErr := m.isRefreshHeld(refreshSchedule) 428 if holdErr != nil { 429 return holdErr 430 } 431 432 if held { 433 // then a request came in that pushed the refresh out, so we will need 434 // to try again later 435 logger.Noticef("Auto-refresh was delayed mid-way through launching, aborting to try again later") 436 return nil 437 } 438 439 if _, ok := err.(*httputil.PersistentNetworkError); ok { 440 logger.Noticef("Cannot prepare auto-refresh change due to a permanent network error: %s", err) 441 return err 442 } 443 m.state.Set("last-refresh", time.Now()) 444 if err != nil { 445 logger.Noticef("Cannot prepare auto-refresh change: %s", err) 446 return err 447 } 448 449 var msg string 450 switch len(updated) { 451 case 0: 452 logger.Noticef(i18n.G("auto-refresh: all snaps are up-to-date")) 453 return nil 454 case 1: 455 msg = fmt.Sprintf(i18n.G("Auto-refresh snap %q"), updated[0]) 456 case 2, 3: 457 quoted := strutil.Quoted(updated) 458 // TRANSLATORS: the %s is a comma-separated list of quoted snap names 459 msg = fmt.Sprintf(i18n.G("Auto-refresh snaps %s"), quoted) 460 default: 461 msg = fmt.Sprintf(i18n.G("Auto-refresh %d snaps"), len(updated)) 462 } 463 464 chg := m.state.NewChange("auto-refresh", msg) 465 for _, ts := range tasksets { 466 chg.AddAll(ts) 467 } 468 chg.Set("snap-names", updated) 469 chg.Set("api-data", map[string]interface{}{"snap-names": updated}) 470 state.TagTimingsWithChange(perfTimings, chg) 471 472 return nil 473 } 474 475 func refreshScheduleDefault() (ts []*timeutil.Schedule, scheduleStr string, legacy bool, err error) { 476 refreshSchedule, err := timeutil.ParseSchedule(defaultRefreshSchedule) 477 if err != nil { 478 panic(fmt.Sprintf("defaultRefreshSchedule cannot be parsed: %s", err)) 479 } 480 481 return refreshSchedule, defaultRefreshSchedule, false, nil 482 } 483 484 func autoRefreshInFlight(st *state.State) bool { 485 for _, chg := range st.Changes() { 486 if chg.Kind() == "auto-refresh" && !chg.Status().Ready() { 487 return true 488 } 489 } 490 return false 491 } 492 493 // refreshScheduleManaged returns true if the refresh schedule of the 494 // device is managed by an external snap 495 func refreshScheduleManaged(st *state.State) (managed, requested, legacy bool) { 496 var confStr string 497 498 // this will only be "nil" if running in tests 499 if CanManageRefreshes == nil { 500 return false, false, legacy 501 } 502 503 // check new style timer first 504 tr := config.NewTransaction(st) 505 err := tr.Get("core", "refresh.timer", &confStr) 506 if err != nil && !config.IsNoOption(err) { 507 return false, false, legacy 508 } 509 // if not set, fallback to refresh.schedule 510 if confStr == "" { 511 if err := tr.Get("core", "refresh.schedule", &confStr); err != nil { 512 return false, false, legacy 513 } 514 legacy = true 515 } 516 517 if confStr != "managed" { 518 return false, false, legacy 519 } 520 521 return CanManageRefreshes(st), true, legacy 522 } 523 524 // getTime retrieves a time from a state value. 525 func getTime(st *state.State, timeKey string) (time.Time, error) { 526 var t1 time.Time 527 err := st.Get(timeKey, &t1) 528 if err != nil && err != state.ErrNoState { 529 return time.Time{}, err 530 } 531 return t1, nil 532 } 533 534 // inhibitRefresh returns an error if refresh is inhibited by running apps. 535 // 536 // Internally the snap state is updated to remember when the inhibition first 537 // took place. Apps can inhibit refreshes for up to "maxInhibition", beyond 538 // that period the refresh will go ahead despite application activity. 539 func inhibitRefresh(st *state.State, snapst *SnapState, info *snap.Info, checker func(*snap.Info) error) error { 540 if err := checker(info); err != nil { 541 days := int(maxInhibition.Truncate(time.Hour).Hours() / 24) 542 now := time.Now() 543 if snapst.RefreshInhibitedTime == nil { 544 // Store the instant when the snap was first inhibited. 545 // This is reset to nil on successful refresh. 546 snapst.RefreshInhibitedTime = &now 547 Set(st, info.InstanceName(), snapst) 548 if _, ok := err.(*BusySnapError); ok { 549 st.Warnf(i18n.NG( 550 "snap %q is currently in use. Its refresh will be postponed for up to %d day to wait for the snap to no longer be in use.", 551 "snap %q is currently in use. Its refresh will be postponed for up to %d days to wait for the snap to no longer be in use.", days), 552 info.SnapName(), days) 553 } 554 return err 555 } 556 557 if now.Sub(*snapst.RefreshInhibitedTime) < maxInhibition { 558 // If we are still in the allowed window then just return 559 // the error but don't change the snap state again. 560 return err 561 } 562 if _, ok := err.(*BusySnapError); ok { 563 st.Warnf(i18n.NG( 564 "snap %q has been running for the maximum allowable %d day since its refresh was postponed. It will now be refreshed.", 565 "snap %q has been running for the maximum allowable %d days since its refresh was postponed. It will now be refreshed.", days), 566 info.SnapName(), days) 567 } 568 } 569 return nil 570 }