github.com/meulengracht/snapd@v0.0.0-20210719210640-8bde69bcc84e/overlord/snapstate/autorefresh.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2017-2020 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package snapstate 21 22 import ( 23 "context" 24 "fmt" 25 "os" 26 "time" 27 28 "github.com/snapcore/snapd/httputil" 29 "github.com/snapcore/snapd/i18n" 30 "github.com/snapcore/snapd/logger" 31 "github.com/snapcore/snapd/overlord/auth" 32 "github.com/snapcore/snapd/overlord/configstate/config" 33 "github.com/snapcore/snapd/overlord/state" 34 "github.com/snapcore/snapd/release" 35 "github.com/snapcore/snapd/snap" 36 "github.com/snapcore/snapd/strutil" 37 "github.com/snapcore/snapd/timeutil" 38 "github.com/snapcore/snapd/timings" 39 userclient "github.com/snapcore/snapd/usersession/client" 40 ) 41 42 // the default refresh pattern 43 const defaultRefreshSchedule = "00:00~24:00/4" 44 45 // cannot keep without refreshing for more than maxPostponement 46 const maxPostponement = 95 * 24 * time.Hour 47 48 // buffer for maxPostponement when holding snaps with auto-refresh gating 49 const maxPostponementBuffer = 5 * 24 * time.Hour 50 51 // cannot inhibit refreshes for more than maxInhibition 52 const maxInhibition = 14 * 24 * time.Hour 53 54 // hooks setup by devicestate 55 var ( 56 CanAutoRefresh func(st *state.State) (bool, error) 57 CanManageRefreshes func(st *state.State) bool 58 IsOnMeteredConnection func() (bool, error) 59 ) 60 61 // refreshRetryDelay specified the minimum time to retry failed refreshes 62 var refreshRetryDelay = 20 * time.Minute 63 64 // refreshCandidate carries information about a single snap to update as part 65 // of auto-refresh. 66 type refreshCandidate struct { 67 SnapSetup 68 Version string `json:"version,omitempty"` 69 } 70 71 func (rc *refreshCandidate) Type() snap.Type { 72 return rc.SnapSetup.Type 73 } 74 75 func (rc *refreshCandidate) SnapBase() string { 76 return rc.SnapSetup.Base 77 } 78 79 func (rc *refreshCandidate) DownloadSize() int64 { 80 return rc.DownloadInfo.Size 81 } 82 83 func (rc *refreshCandidate) InstanceName() string { 84 return rc.SnapSetup.InstanceName() 85 } 86 87 func (rc *refreshCandidate) Prereq(st *state.State) []string { 88 return rc.SnapSetup.Prereq 89 } 90 91 func (rc *refreshCandidate) SnapSetupForUpdate(st *state.State, params updateParamsFunc, userID int, globalFlags *Flags) (*SnapSetup, *SnapState, error) { 92 var snapst SnapState 93 if err := Get(st, rc.InstanceName(), &snapst); err != nil { 94 return nil, nil, err 95 } 96 return &rc.SnapSetup, &snapst, nil 97 } 98 99 // soundness check 100 var _ readyUpdateInfo = (*refreshCandidate)(nil) 101 102 // autoRefresh will ensure that snaps are refreshed automatically 103 // according to the refresh schedule. 104 type autoRefresh struct { 105 state *state.State 106 107 lastRefreshSchedule string 108 nextRefresh time.Time 109 lastRefreshAttempt time.Time 110 managedDeniedLogged bool 111 } 112 113 func newAutoRefresh(st *state.State) *autoRefresh { 114 return &autoRefresh{ 115 state: st, 116 } 117 } 118 119 // RefreshSchedule will return a user visible string with the current schedule 120 // for the automatic refreshes and a flag indicating whether the schedule is a 121 // legacy one. 122 func (m *autoRefresh) RefreshSchedule() (schedule string, legacy bool, err error) { 123 _, schedule, legacy, err = m.refreshScheduleWithDefaultsFallback() 124 return schedule, legacy, err 125 } 126 127 // NextRefresh returns when the next automatic refresh will happen. 128 func (m *autoRefresh) NextRefresh() time.Time { 129 return m.nextRefresh 130 } 131 132 // LastRefresh returns when the last refresh happened. 133 func (m *autoRefresh) LastRefresh() (time.Time, error) { 134 return getTime(m.state, "last-refresh") 135 } 136 137 // EffectiveRefreshHold returns the time until to which refreshes are 138 // held if refresh.hold configuration is set and accounting for the 139 // max postponement since the last refresh. 140 func (m *autoRefresh) EffectiveRefreshHold() (time.Time, error) { 141 var holdTime time.Time 142 143 tr := config.NewTransaction(m.state) 144 err := tr.Get("core", "refresh.hold", &holdTime) 145 if err != nil && !config.IsNoOption(err) { 146 return time.Time{}, err 147 } 148 149 // cannot hold beyond last-refresh + max-postponement 150 lastRefresh, err := m.LastRefresh() 151 if err != nil { 152 return time.Time{}, err 153 } 154 if lastRefresh.IsZero() { 155 seedTime, err := getTime(m.state, "seed-time") 156 if err != nil { 157 return time.Time{}, err 158 } 159 if seedTime.IsZero() { 160 // no reference to know whether holding is reasonable 161 return time.Time{}, nil 162 } 163 lastRefresh = seedTime 164 } 165 166 limitTime := lastRefresh.Add(maxPostponement) 167 if holdTime.After(limitTime) { 168 return limitTime, nil 169 } 170 171 return holdTime, nil 172 } 173 174 func (m *autoRefresh) ensureRefreshHoldAtLeast(duration time.Duration) error { 175 now := time.Now() 176 177 // get the effective refresh hold and check if it is sooner than the 178 // specified duration in the future 179 effective, err := m.EffectiveRefreshHold() 180 if err != nil { 181 return err 182 } 183 184 if effective.IsZero() || effective.Sub(now) < duration { 185 // the effective refresh hold is sooner than the desired delay, so 186 // move it out to the specified duration 187 holdTime := now.Add(duration) 188 tr := config.NewTransaction(m.state) 189 err := tr.Set("core", "refresh.hold", &holdTime) 190 if err != nil && !config.IsNoOption(err) { 191 return err 192 } 193 tr.Commit() 194 } 195 196 return nil 197 } 198 199 // clearRefreshHold clears refresh.hold configuration. 200 func (m *autoRefresh) clearRefreshHold() { 201 tr := config.NewTransaction(m.state) 202 tr.Set("core", "refresh.hold", nil) 203 tr.Commit() 204 } 205 206 // AtSeed configures refresh policies at end of seeding. 207 func (m *autoRefresh) AtSeed() error { 208 // on classic hold refreshes for 2h after seeding 209 if release.OnClassic { 210 var t1 time.Time 211 tr := config.NewTransaction(m.state) 212 err := tr.Get("core", "refresh.hold", &t1) 213 if !config.IsNoOption(err) { 214 // already set or error 215 return err 216 } 217 // TODO: have a policy that if the snapd exe itself 218 // is older than X weeks/months we skip the holding? 219 now := time.Now().UTC() 220 tr.Set("core", "refresh.hold", now.Add(2*time.Hour)) 221 tr.Commit() 222 m.nextRefresh = now 223 } 224 return nil 225 } 226 227 func canRefreshOnMeteredConnection(st *state.State) (bool, error) { 228 tr := config.NewTransaction(st) 229 var onMetered string 230 err := tr.GetMaybe("core", "refresh.metered", &onMetered) 231 if err != nil && err != state.ErrNoState { 232 return false, err 233 } 234 235 return onMetered != "hold", nil 236 } 237 238 func (m *autoRefresh) canRefreshRespectingMetered(now, lastRefresh time.Time) (can bool, err error) { 239 can, err = canRefreshOnMeteredConnection(m.state) 240 if err != nil { 241 return false, err 242 } 243 if can { 244 return true, nil 245 } 246 247 // ignore any errors that occurred while checking if we are on a metered 248 // connection 249 metered, _ := IsOnMeteredConnection() 250 if !metered { 251 return true, nil 252 } 253 254 if now.Sub(lastRefresh) >= maxPostponement { 255 // TODO use warnings when the infra becomes available 256 logger.Noticef("Auto refresh disabled while on metered connections, but pending for too long (%d days). Trying to refresh now.", int(maxPostponement.Hours()/24)) 257 return true, nil 258 } 259 260 logger.Debugf("Auto refresh disabled on metered connections") 261 262 return false, nil 263 } 264 265 // Ensure ensures that we refresh all installed snaps periodically 266 func (m *autoRefresh) Ensure() error { 267 m.state.Lock() 268 defer m.state.Unlock() 269 270 // see if it even makes sense to try to refresh 271 if CanAutoRefresh == nil { 272 return nil 273 } 274 if ok, err := CanAutoRefresh(m.state); err != nil || !ok { 275 return err 276 } 277 278 // get lastRefresh and schedule 279 lastRefresh, err := m.LastRefresh() 280 if err != nil { 281 return err 282 } 283 284 refreshSchedule, refreshScheduleStr, _, err := m.refreshScheduleWithDefaultsFallback() 285 if err != nil { 286 return err 287 } 288 if len(refreshSchedule) == 0 { 289 m.nextRefresh = time.Time{} 290 return nil 291 } 292 // we already have a refresh time, check if we got a new config 293 if !m.nextRefresh.IsZero() { 294 if m.lastRefreshSchedule != refreshScheduleStr { 295 // the refresh schedule has changed 296 logger.Debugf("Refresh timer changed.") 297 m.nextRefresh = time.Time{} 298 } 299 } 300 m.lastRefreshSchedule = refreshScheduleStr 301 302 // ensure nothing is in flight already 303 if autoRefreshInFlight(m.state) { 304 return nil 305 } 306 307 now := time.Now() 308 // compute next refresh attempt time (if needed) 309 if m.nextRefresh.IsZero() { 310 // store attempts in memory so that we can backoff 311 if !lastRefresh.IsZero() { 312 delta := timeutil.Next(refreshSchedule, lastRefresh, maxPostponement) 313 now = time.Now() 314 m.nextRefresh = now.Add(delta) 315 } else { 316 // make sure either seed-time or last-refresh 317 // are set for hold code below 318 m.ensureLastRefreshAnchor() 319 // immediate 320 m.nextRefresh = now 321 } 322 logger.Debugf("Next refresh scheduled for %s.", m.nextRefresh.Format(time.RFC3339)) 323 } 324 325 held, holdTime, err := m.isRefreshHeld() 326 if err != nil { 327 return err 328 } 329 330 // do refresh attempt (if needed) 331 if !held { 332 if !holdTime.IsZero() { 333 // expired hold case 334 m.clearRefreshHold() 335 if m.nextRefresh.Before(holdTime) { 336 // next refresh is obsolete, compute the next one 337 delta := timeutil.Next(refreshSchedule, holdTime, maxPostponement) 338 now = time.Now() 339 m.nextRefresh = now.Add(delta) 340 } 341 } 342 343 // refresh is also "held" if the next time is in the future 344 // note that the two times here could be exactly equal, so we use 345 // !After() because that is true in the case that the next refresh is 346 // before now, and the next refresh is equal to now without requiring an 347 // or operation 348 if !m.nextRefresh.After(now) { 349 var can bool 350 can, err = m.canRefreshRespectingMetered(now, lastRefresh) 351 if err != nil { 352 return err 353 } 354 if !can { 355 // clear nextRefresh so that another refresh time is calculated 356 m.nextRefresh = time.Time{} 357 return nil 358 } 359 360 // Check that we have reasonable delays between attempts. 361 // If the store is under stress we need to make sure we do not 362 // hammer it too often 363 if !m.lastRefreshAttempt.IsZero() && m.lastRefreshAttempt.Add(refreshRetryDelay).After(time.Now()) { 364 return nil 365 } 366 367 err = m.launchAutoRefresh(refreshSchedule) 368 if _, ok := err.(*httputil.PersistentNetworkError); !ok { 369 m.nextRefresh = time.Time{} 370 } // else - refresh will be retried after refreshRetryDelay 371 } 372 } 373 374 return err 375 } 376 377 // isRefreshHeld returns whether an auto-refresh is currently held back or not, 378 // as indicated by m.EffectiveRefreshHold(). 379 func (m *autoRefresh) isRefreshHeld() (bool, time.Time, error) { 380 now := time.Now() 381 // should we hold back refreshes? 382 holdTime, err := m.EffectiveRefreshHold() 383 if err != nil { 384 return false, time.Time{}, err 385 } 386 if holdTime.After(now) { 387 return true, holdTime, nil 388 } 389 390 return false, holdTime, nil 391 } 392 393 func (m *autoRefresh) ensureLastRefreshAnchor() { 394 seedTime, _ := getTime(m.state, "seed-time") 395 if !seedTime.IsZero() { 396 return 397 } 398 399 // last core refresh 400 coreRefreshDate := snap.InstallDate("core") 401 if !coreRefreshDate.IsZero() { 402 m.state.Set("last-refresh", coreRefreshDate) 403 return 404 } 405 406 // fallback to executable time 407 st, err := os.Stat("/proc/self/exe") 408 if err == nil { 409 m.state.Set("last-refresh", st.ModTime()) 410 return 411 } 412 } 413 414 // refreshScheduleWithDefaultsFallback returns the current refresh schedule 415 // and refresh string. When an invalid refresh schedule is set by the user 416 // the refresh schedule is automatically reset to the default. 417 // 418 // TODO: we can remove the refreshSchedule reset because we have validation 419 // of the schedule now. 420 func (m *autoRefresh) refreshScheduleWithDefaultsFallback() (ts []*timeutil.Schedule, scheduleAsStr string, legacy bool, err error) { 421 managed, requested, legacy := refreshScheduleManaged(m.state) 422 if managed { 423 if m.lastRefreshSchedule != "managed" { 424 logger.Noticef("refresh is managed via the snapd-control interface") 425 m.lastRefreshSchedule = "managed" 426 } 427 m.managedDeniedLogged = false 428 return nil, "managed", legacy, nil 429 } else if requested { 430 // managed refresh schedule was denied 431 if !m.managedDeniedLogged { 432 logger.Noticef("managed refresh schedule denied, no properly configured snapd-control") 433 m.managedDeniedLogged = true 434 } 435 // fallback to default schedule 436 return refreshScheduleDefault() 437 } else { 438 m.managedDeniedLogged = false 439 } 440 441 tr := config.NewTransaction(m.state) 442 // try the new refresh.timer config option first 443 err = tr.Get("core", "refresh.timer", &scheduleAsStr) 444 if err != nil && !config.IsNoOption(err) { 445 return nil, "", false, err 446 } 447 if scheduleAsStr != "" { 448 ts, err = timeutil.ParseSchedule(scheduleAsStr) 449 if err != nil { 450 logger.Noticef("cannot use refresh.timer configuration: %s", err) 451 return refreshScheduleDefault() 452 } 453 return ts, scheduleAsStr, false, nil 454 } 455 456 // fallback to legacy refresh.schedule setting when the new 457 // config option is not set 458 err = tr.Get("core", "refresh.schedule", &scheduleAsStr) 459 if err != nil && !config.IsNoOption(err) { 460 return nil, "", false, err 461 } 462 if scheduleAsStr != "" { 463 ts, err = timeutil.ParseLegacySchedule(scheduleAsStr) 464 if err != nil { 465 logger.Noticef("cannot use refresh.schedule configuration: %s", err) 466 return refreshScheduleDefault() 467 } 468 return ts, scheduleAsStr, true, nil 469 } 470 471 return refreshScheduleDefault() 472 } 473 474 // launchAutoRefresh creates the auto-refresh taskset and a change for it. 475 func (m *autoRefresh) launchAutoRefresh(refreshSchedule []*timeutil.Schedule) error { 476 perfTimings := timings.New(map[string]string{"ensure": "auto-refresh"}) 477 tm := perfTimings.StartSpan("auto-refresh", "query store and setup auto-refresh change") 478 defer func() { 479 tm.Stop() 480 perfTimings.Save(m.state) 481 }() 482 483 m.lastRefreshAttempt = time.Now() 484 485 // NOTE: this will unlock and re-lock state for network ops 486 updated, tasksets, err := AutoRefresh(auth.EnsureContextTODO(), m.state) 487 488 // TODO: we should have some way to lock just creating and starting changes, 489 // as that would alleviate this race condition we are guarding against 490 // with this check and probably would eliminate other similar race 491 // conditions elsewhere 492 493 // re-check if the refresh is held because it could have been re-held and 494 // pushed back, in which case we need to abort the auto-refresh and wait 495 held, _, holdErr := m.isRefreshHeld() 496 if holdErr != nil { 497 return holdErr 498 } 499 500 if held { 501 // then a request came in that pushed the refresh out, so we will need 502 // to try again later 503 logger.Noticef("Auto-refresh was delayed mid-way through launching, aborting to try again later") 504 return nil 505 } 506 507 if _, ok := err.(*httputil.PersistentNetworkError); ok { 508 logger.Noticef("Cannot prepare auto-refresh change due to a permanent network error: %s", err) 509 return err 510 } 511 m.state.Set("last-refresh", time.Now()) 512 if err != nil { 513 logger.Noticef("Cannot prepare auto-refresh change: %s", err) 514 return err 515 } 516 517 var msg string 518 switch len(updated) { 519 case 0: 520 logger.Noticef(i18n.G("auto-refresh: all snaps are up-to-date")) 521 return nil 522 case 1: 523 msg = fmt.Sprintf(i18n.G("Auto-refresh snap %q"), updated[0]) 524 case 2, 3: 525 quoted := strutil.Quoted(updated) 526 // TRANSLATORS: the %s is a comma-separated list of quoted snap names 527 msg = fmt.Sprintf(i18n.G("Auto-refresh snaps %s"), quoted) 528 default: 529 msg = fmt.Sprintf(i18n.G("Auto-refresh %d snaps"), len(updated)) 530 } 531 532 chg := m.state.NewChange("auto-refresh", msg) 533 for _, ts := range tasksets { 534 chg.AddAll(ts) 535 } 536 chg.Set("snap-names", updated) 537 chg.Set("api-data", map[string]interface{}{"snap-names": updated}) 538 state.TagTimingsWithChange(perfTimings, chg) 539 540 return nil 541 } 542 543 func refreshScheduleDefault() (ts []*timeutil.Schedule, scheduleStr string, legacy bool, err error) { 544 refreshSchedule, err := timeutil.ParseSchedule(defaultRefreshSchedule) 545 if err != nil { 546 panic(fmt.Sprintf("defaultRefreshSchedule cannot be parsed: %s", err)) 547 } 548 549 return refreshSchedule, defaultRefreshSchedule, false, nil 550 } 551 552 func autoRefreshInFlight(st *state.State) bool { 553 for _, chg := range st.Changes() { 554 if chg.Kind() == "auto-refresh" && !chg.Status().Ready() { 555 return true 556 } 557 } 558 return false 559 } 560 561 // refreshScheduleManaged returns true if the refresh schedule of the 562 // device is managed by an external snap 563 func refreshScheduleManaged(st *state.State) (managed, requested, legacy bool) { 564 var confStr string 565 566 // this will only be "nil" if running in tests 567 if CanManageRefreshes == nil { 568 return false, false, legacy 569 } 570 571 // check new style timer first 572 tr := config.NewTransaction(st) 573 err := tr.Get("core", "refresh.timer", &confStr) 574 if err != nil && !config.IsNoOption(err) { 575 return false, false, legacy 576 } 577 // if not set, fallback to refresh.schedule 578 if confStr == "" { 579 if err := tr.Get("core", "refresh.schedule", &confStr); err != nil { 580 return false, false, legacy 581 } 582 legacy = true 583 } 584 585 if confStr != "managed" { 586 return false, false, legacy 587 } 588 589 return CanManageRefreshes(st), true, legacy 590 } 591 592 // getTime retrieves a time from a state value. 593 func getTime(st *state.State, timeKey string) (time.Time, error) { 594 var t1 time.Time 595 err := st.Get(timeKey, &t1) 596 if err != nil && err != state.ErrNoState { 597 return time.Time{}, err 598 } 599 return t1, nil 600 } 601 602 // asyncPendingRefreshNotification broadcasts desktop notification in a goroutine. 603 // 604 // This allows the, possibly slow, communication with each snapd session agent, 605 // to be performed without holding the snap state lock. 606 var asyncPendingRefreshNotification = func(context context.Context, client *userclient.Client, refreshInfo *userclient.PendingSnapRefreshInfo) { 607 go func() { 608 if err := client.PendingRefreshNotification(context, refreshInfo); err != nil { 609 logger.Noticef("Cannot send notification about pending refresh: %v", err) 610 } 611 }() 612 } 613 614 // inhibitRefresh returns an error if refresh is inhibited by running apps. 615 // 616 // Internally the snap state is updated to remember when the inhibition first 617 // took place. Apps can inhibit refreshes for up to "maxInhibition", beyond 618 // that period the refresh will go ahead despite application activity. 619 func inhibitRefresh(st *state.State, snapst *SnapState, info *snap.Info, checker func(*snap.Info) error) error { 620 checkerErr := checker(info) 621 if checkerErr == nil { 622 return nil 623 } 624 625 // Get pending refresh information from compatible errors or synthesize a new one. 626 var refreshInfo *userclient.PendingSnapRefreshInfo 627 if err, ok := checkerErr.(*BusySnapError); ok { 628 refreshInfo = err.PendingSnapRefreshInfo() 629 } else { 630 refreshInfo = &userclient.PendingSnapRefreshInfo{ 631 InstanceName: info.InstanceName(), 632 } 633 } 634 635 // Decide on what to do depending on the state of the snap and the remaining 636 // inhibition time. 637 now := time.Now() 638 switch { 639 case snapst.RefreshInhibitedTime == nil: 640 // If the snap did not have inhibited refresh yet then commence a new 641 // window, during which refreshes are postponed, by storing the current 642 // time in the snap state's RefreshInhibitedTime field. This field is 643 // reset to nil on successful refresh. 644 snapst.RefreshInhibitedTime = &now 645 refreshInfo.TimeRemaining = (maxInhibition - now.Sub(*snapst.RefreshInhibitedTime)).Truncate(time.Second) 646 Set(st, info.InstanceName(), snapst) 647 case now.Sub(*snapst.RefreshInhibitedTime) < maxInhibition: 648 // If we are still in the allowed window then just return the error but 649 // don't change the snap state again. 650 // TODO: as time left shrinks, send additional notifications with 651 // increasing frequency, allowing the user to understand the urgency. 652 refreshInfo.TimeRemaining = (maxInhibition - now.Sub(*snapst.RefreshInhibitedTime)).Truncate(time.Second) 653 default: 654 // If we run out of time then consume the error that would normally 655 // inhibit refresh and notify the user that the snap is refreshing right 656 // now, by not setting the TimeRemaining field of the refresh 657 // notification message. 658 checkerErr = nil 659 } 660 661 // Send the notification asynchronously to avoid holding the state lock. 662 asyncPendingRefreshNotification(context.TODO(), userclient.New(), refreshInfo) 663 return checkerErr 664 } 665 666 // for testing outside of snapstate 667 func MockRefreshCandidate(snapSetup *SnapSetup, version string) interface{} { 668 return &refreshCandidate{ 669 SnapSetup: *snapSetup, 670 Version: version, 671 } 672 }