github.com/kubiko/snapd@v0.0.0-20201013125620-d4f3094d9ddf/overlord/snapstate/autorefresh.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2017-2020 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package snapstate
    21  
    22  import (
    23  	"fmt"
    24  	"os"
    25  	"time"
    26  
    27  	"github.com/snapcore/snapd/httputil"
    28  	"github.com/snapcore/snapd/i18n"
    29  	"github.com/snapcore/snapd/logger"
    30  	"github.com/snapcore/snapd/overlord/auth"
    31  	"github.com/snapcore/snapd/overlord/configstate/config"
    32  	"github.com/snapcore/snapd/overlord/state"
    33  	"github.com/snapcore/snapd/release"
    34  	"github.com/snapcore/snapd/snap"
    35  	"github.com/snapcore/snapd/strutil"
    36  	"github.com/snapcore/snapd/timeutil"
    37  	"github.com/snapcore/snapd/timings"
    38  )
    39  
    40  // the default refresh pattern
    41  const defaultRefreshSchedule = "00:00~24:00/4"
    42  
    43  // cannot keep without refreshing for more than maxPostponement
    44  const maxPostponement = 60 * 24 * time.Hour
    45  
    46  // cannot inhibit refreshes for more than maxInhibition
    47  const maxInhibition = 14 * 24 * time.Hour
    48  
    49  // hooks setup by devicestate
    50  var (
    51  	CanAutoRefresh        func(st *state.State) (bool, error)
    52  	CanManageRefreshes    func(st *state.State) bool
    53  	IsOnMeteredConnection func() (bool, error)
    54  )
    55  
    56  // refreshRetryDelay specified the minimum time to retry failed refreshes
    57  var refreshRetryDelay = 20 * time.Minute
    58  
    59  // autoRefresh will ensure that snaps are refreshed automatically
    60  // according to the refresh schedule.
    61  type autoRefresh struct {
    62  	state *state.State
    63  
    64  	lastRefreshSchedule string
    65  	nextRefresh         time.Time
    66  	lastRefreshAttempt  time.Time
    67  	managedDeniedLogged bool
    68  }
    69  
    70  func newAutoRefresh(st *state.State) *autoRefresh {
    71  	return &autoRefresh{
    72  		state: st,
    73  	}
    74  }
    75  
    76  // RefreshSchedule will return a user visible string with the current schedule
    77  // for the automatic refreshes and a flag indicating whether the schedule is a
    78  // legacy one.
    79  func (m *autoRefresh) RefreshSchedule() (schedule string, legacy bool, err error) {
    80  	_, schedule, legacy, err = m.refreshScheduleWithDefaultsFallback()
    81  	return schedule, legacy, err
    82  }
    83  
    84  // NextRefresh returns when the next automatic refresh will happen.
    85  func (m *autoRefresh) NextRefresh() time.Time {
    86  	return m.nextRefresh
    87  }
    88  
    89  // LastRefresh returns when the last refresh happened.
    90  func (m *autoRefresh) LastRefresh() (time.Time, error) {
    91  	return getTime(m.state, "last-refresh")
    92  }
    93  
    94  // EffectiveRefreshHold returns the time until to which refreshes are
    95  // held if refresh.hold configuration is set and accounting for the
    96  // max postponement since the last refresh.
    97  func (m *autoRefresh) EffectiveRefreshHold() (time.Time, error) {
    98  	var holdTime time.Time
    99  
   100  	tr := config.NewTransaction(m.state)
   101  	err := tr.Get("core", "refresh.hold", &holdTime)
   102  	if err != nil && !config.IsNoOption(err) {
   103  		return time.Time{}, err
   104  	}
   105  
   106  	// cannot hold beyond last-refresh + max-postponement
   107  	lastRefresh, err := m.LastRefresh()
   108  	if err != nil {
   109  		return time.Time{}, err
   110  	}
   111  	if lastRefresh.IsZero() {
   112  		seedTime, err := getTime(m.state, "seed-time")
   113  		if err != nil {
   114  			return time.Time{}, err
   115  		}
   116  		if seedTime.IsZero() {
   117  			// no reference to know whether holding is reasonable
   118  			return time.Time{}, nil
   119  		}
   120  		lastRefresh = seedTime
   121  	}
   122  
   123  	limitTime := lastRefresh.Add(maxPostponement)
   124  	if holdTime.After(limitTime) {
   125  		return limitTime, nil
   126  	}
   127  
   128  	return holdTime, nil
   129  }
   130  
   131  // clearRefreshHold clears refresh.hold configuration.
   132  func (m *autoRefresh) clearRefreshHold() {
   133  	tr := config.NewTransaction(m.state)
   134  	tr.Set("core", "refresh.hold", nil)
   135  	tr.Commit()
   136  }
   137  
   138  // AtSeed configures refresh policies at end of seeding.
   139  func (m *autoRefresh) AtSeed() error {
   140  	// on classic hold refreshes for 2h after seeding
   141  	if release.OnClassic {
   142  		var t1 time.Time
   143  		tr := config.NewTransaction(m.state)
   144  		err := tr.Get("core", "refresh.hold", &t1)
   145  		if !config.IsNoOption(err) {
   146  			// already set or error
   147  			return err
   148  		}
   149  		// TODO: have a policy that if the snapd exe itself
   150  		// is older than X weeks/months we skip the holding?
   151  		now := time.Now().UTC()
   152  		tr.Set("core", "refresh.hold", now.Add(2*time.Hour))
   153  		tr.Commit()
   154  		m.nextRefresh = now
   155  	}
   156  	return nil
   157  }
   158  
   159  func canRefreshOnMeteredConnection(st *state.State) (bool, error) {
   160  	tr := config.NewTransaction(st)
   161  	var onMetered string
   162  	err := tr.GetMaybe("core", "refresh.metered", &onMetered)
   163  	if err != nil && err != state.ErrNoState {
   164  		return false, err
   165  	}
   166  
   167  	return onMetered != "hold", nil
   168  }
   169  
   170  func (m *autoRefresh) canRefreshRespectingMetered(now, lastRefresh time.Time) (can bool, err error) {
   171  	can, err = canRefreshOnMeteredConnection(m.state)
   172  	if err != nil {
   173  		return false, err
   174  	}
   175  	if can {
   176  		return true, nil
   177  	}
   178  
   179  	// ignore any errors that occurred while checking if we are on a metered
   180  	// connection
   181  	metered, _ := IsOnMeteredConnection()
   182  	if !metered {
   183  		return true, nil
   184  	}
   185  
   186  	if now.Sub(lastRefresh) >= maxPostponement {
   187  		// TODO use warnings when the infra becomes available
   188  		logger.Noticef("Auto refresh disabled while on metered connections, but pending for too long (%d days). Trying to refresh now.", int(maxPostponement.Hours()/24))
   189  		return true, nil
   190  	}
   191  
   192  	logger.Debugf("Auto refresh disabled on metered connections")
   193  
   194  	return false, nil
   195  }
   196  
   197  // Ensure ensures that we refresh all installed snaps periodically
   198  func (m *autoRefresh) Ensure() error {
   199  	m.state.Lock()
   200  	defer m.state.Unlock()
   201  
   202  	// see if it even makes sense to try to refresh
   203  	if CanAutoRefresh == nil {
   204  		return nil
   205  	}
   206  	if ok, err := CanAutoRefresh(m.state); err != nil || !ok {
   207  		return err
   208  	}
   209  
   210  	// get lastRefresh and schedule
   211  	lastRefresh, err := m.LastRefresh()
   212  	if err != nil {
   213  		return err
   214  	}
   215  
   216  	refreshSchedule, refreshScheduleStr, _, err := m.refreshScheduleWithDefaultsFallback()
   217  	if err != nil {
   218  		return err
   219  	}
   220  	if len(refreshSchedule) == 0 {
   221  		m.nextRefresh = time.Time{}
   222  		return nil
   223  	}
   224  	// we already have a refresh time, check if we got a new config
   225  	if !m.nextRefresh.IsZero() {
   226  		if m.lastRefreshSchedule != refreshScheduleStr {
   227  			// the refresh schedule has changed
   228  			logger.Debugf("Refresh timer changed.")
   229  			m.nextRefresh = time.Time{}
   230  		}
   231  	}
   232  	m.lastRefreshSchedule = refreshScheduleStr
   233  
   234  	// ensure nothing is in flight already
   235  	if autoRefreshInFlight(m.state) {
   236  		return nil
   237  	}
   238  
   239  	now := time.Now()
   240  	// compute next refresh attempt time (if needed)
   241  	if m.nextRefresh.IsZero() {
   242  		// store attempts in memory so that we can backoff
   243  		if !lastRefresh.IsZero() {
   244  			delta := timeutil.Next(refreshSchedule, lastRefresh, maxPostponement)
   245  			now = time.Now()
   246  			m.nextRefresh = now.Add(delta)
   247  		} else {
   248  			// make sure either seed-time or last-refresh
   249  			// are set for hold code below
   250  			m.ensureLastRefreshAnchor()
   251  			// immediate
   252  			m.nextRefresh = now
   253  		}
   254  		logger.Debugf("Next refresh scheduled for %s.", m.nextRefresh.Format(time.RFC3339))
   255  	}
   256  
   257  	held, holdTime, err := m.isRefreshHeld(refreshSchedule)
   258  	if err != nil {
   259  		return err
   260  	}
   261  
   262  	// do refresh attempt (if needed)
   263  	if !held {
   264  		if !holdTime.IsZero() {
   265  			// expired hold case
   266  			m.clearRefreshHold()
   267  			if m.nextRefresh.Before(holdTime) {
   268  				// next refresh is obsolete, compute the next one
   269  				delta := timeutil.Next(refreshSchedule, holdTime, maxPostponement)
   270  				now = time.Now()
   271  				m.nextRefresh = now.Add(delta)
   272  			}
   273  		}
   274  
   275  		// refresh is also "held" if the next time is in the future
   276  		// note that the two times here could be exactly equal, so we use
   277  		// !After() because that is true in the case that the next refresh is
   278  		// before now, and the next refresh is equal to now without requiring an
   279  		// or operation
   280  		if !m.nextRefresh.After(now) {
   281  			var can bool
   282  			can, err = m.canRefreshRespectingMetered(now, lastRefresh)
   283  			if err != nil {
   284  				return err
   285  			}
   286  			if !can {
   287  				// clear nextRefresh so that another refresh time is calculated
   288  				m.nextRefresh = time.Time{}
   289  				return nil
   290  			}
   291  
   292  			// Check that we have reasonable delays between attempts.
   293  			// If the store is under stress we need to make sure we do not
   294  			// hammer it too often
   295  			if !m.lastRefreshAttempt.IsZero() && m.lastRefreshAttempt.Add(refreshRetryDelay).After(time.Now()) {
   296  				return nil
   297  			}
   298  
   299  			err = m.launchAutoRefresh(refreshSchedule)
   300  			if _, ok := err.(*httputil.PersistentNetworkError); !ok {
   301  				m.nextRefresh = time.Time{}
   302  			} // else - refresh will be retried after refreshRetryDelay
   303  		}
   304  	}
   305  
   306  	return err
   307  }
   308  
   309  // isRefreshHeld returns whether an auto-refresh is currently held back or not,
   310  // as indicated by m.EffectiveRefreshHold().
   311  func (m *autoRefresh) isRefreshHeld(refreshSchedule []*timeutil.Schedule) (bool, time.Time, error) {
   312  	now := time.Now()
   313  	// should we hold back refreshes?
   314  	holdTime, err := m.EffectiveRefreshHold()
   315  	if err != nil {
   316  		return false, time.Time{}, err
   317  	}
   318  	if holdTime.After(now) {
   319  		return true, holdTime, nil
   320  	}
   321  
   322  	return false, holdTime, nil
   323  }
   324  
   325  func (m *autoRefresh) ensureLastRefreshAnchor() {
   326  	seedTime, _ := getTime(m.state, "seed-time")
   327  	if !seedTime.IsZero() {
   328  		return
   329  	}
   330  
   331  	// last core refresh
   332  	coreRefreshDate := snap.InstallDate("core")
   333  	if !coreRefreshDate.IsZero() {
   334  		m.state.Set("last-refresh", coreRefreshDate)
   335  		return
   336  	}
   337  
   338  	// fallback to executable time
   339  	st, err := os.Stat("/proc/self/exe")
   340  	if err == nil {
   341  		m.state.Set("last-refresh", st.ModTime())
   342  		return
   343  	}
   344  }
   345  
   346  // refreshScheduleWithDefaultsFallback returns the current refresh schedule
   347  // and refresh string. When an invalid refresh schedule is set by the user
   348  // the refresh schedule is automatically reset to the default.
   349  //
   350  // TODO: we can remove the refreshSchedule reset because we have validation
   351  //       of the schedule now.
   352  func (m *autoRefresh) refreshScheduleWithDefaultsFallback() (ts []*timeutil.Schedule, scheduleAsStr string, legacy bool, err error) {
   353  	managed, requested, legacy := refreshScheduleManaged(m.state)
   354  	if managed {
   355  		if m.lastRefreshSchedule != "managed" {
   356  			logger.Noticef("refresh is managed via the snapd-control interface")
   357  			m.lastRefreshSchedule = "managed"
   358  		}
   359  		m.managedDeniedLogged = false
   360  		return nil, "managed", legacy, nil
   361  	} else if requested {
   362  		// managed refresh schedule was denied
   363  		if !m.managedDeniedLogged {
   364  			logger.Noticef("managed refresh schedule denied, no properly configured snapd-control")
   365  			m.managedDeniedLogged = true
   366  		}
   367  		// fallback to default schedule
   368  		return refreshScheduleDefault()
   369  	} else {
   370  		m.managedDeniedLogged = false
   371  	}
   372  
   373  	tr := config.NewTransaction(m.state)
   374  	// try the new refresh.timer config option first
   375  	err = tr.Get("core", "refresh.timer", &scheduleAsStr)
   376  	if err != nil && !config.IsNoOption(err) {
   377  		return nil, "", false, err
   378  	}
   379  	if scheduleAsStr != "" {
   380  		ts, err = timeutil.ParseSchedule(scheduleAsStr)
   381  		if err != nil {
   382  			logger.Noticef("cannot use refresh.timer configuration: %s", err)
   383  			return refreshScheduleDefault()
   384  		}
   385  		return ts, scheduleAsStr, false, nil
   386  	}
   387  
   388  	// fallback to legacy refresh.schedule setting when the new
   389  	// config option is not set
   390  	err = tr.Get("core", "refresh.schedule", &scheduleAsStr)
   391  	if err != nil && !config.IsNoOption(err) {
   392  		return nil, "", false, err
   393  	}
   394  	if scheduleAsStr != "" {
   395  		ts, err = timeutil.ParseLegacySchedule(scheduleAsStr)
   396  		if err != nil {
   397  			logger.Noticef("cannot use refresh.schedule configuration: %s", err)
   398  			return refreshScheduleDefault()
   399  		}
   400  		return ts, scheduleAsStr, true, nil
   401  	}
   402  
   403  	return refreshScheduleDefault()
   404  }
   405  
   406  // launchAutoRefresh creates the auto-refresh taskset and a change for it.
   407  func (m *autoRefresh) launchAutoRefresh(refreshSchedule []*timeutil.Schedule) error {
   408  	perfTimings := timings.New(map[string]string{"ensure": "auto-refresh"})
   409  	tm := perfTimings.StartSpan("auto-refresh", "query store and setup auto-refresh change")
   410  	defer func() {
   411  		tm.Stop()
   412  		perfTimings.Save(m.state)
   413  	}()
   414  
   415  	m.lastRefreshAttempt = time.Now()
   416  
   417  	// NOTE: this will unlock and re-lock state for network ops
   418  	updated, tasksets, err := AutoRefresh(auth.EnsureContextTODO(), m.state)
   419  
   420  	// TODO: we should have some way to lock just creating and starting changes,
   421  	//       as that would alleviate this race condition we are guarding against
   422  	//       with this check and probably would eliminate other similar race
   423  	//       conditions elsewhere
   424  
   425  	// re-check if the refresh is held because it could have been re-held and
   426  	// pushed back, in which case we need to abort the auto-refresh and wait
   427  	held, _, holdErr := m.isRefreshHeld(refreshSchedule)
   428  	if holdErr != nil {
   429  		return holdErr
   430  	}
   431  
   432  	if held {
   433  		// then a request came in that pushed the refresh out, so we will need
   434  		// to try again later
   435  		logger.Noticef("Auto-refresh was delayed mid-way through launching, aborting to try again later")
   436  		return nil
   437  	}
   438  
   439  	if _, ok := err.(*httputil.PersistentNetworkError); ok {
   440  		logger.Noticef("Cannot prepare auto-refresh change due to a permanent network error: %s", err)
   441  		return err
   442  	}
   443  	m.state.Set("last-refresh", time.Now())
   444  	if err != nil {
   445  		logger.Noticef("Cannot prepare auto-refresh change: %s", err)
   446  		return err
   447  	}
   448  
   449  	var msg string
   450  	switch len(updated) {
   451  	case 0:
   452  		logger.Noticef(i18n.G("auto-refresh: all snaps are up-to-date"))
   453  		return nil
   454  	case 1:
   455  		msg = fmt.Sprintf(i18n.G("Auto-refresh snap %q"), updated[0])
   456  	case 2, 3:
   457  		quoted := strutil.Quoted(updated)
   458  		// TRANSLATORS: the %s is a comma-separated list of quoted snap names
   459  		msg = fmt.Sprintf(i18n.G("Auto-refresh snaps %s"), quoted)
   460  	default:
   461  		msg = fmt.Sprintf(i18n.G("Auto-refresh %d snaps"), len(updated))
   462  	}
   463  
   464  	chg := m.state.NewChange("auto-refresh", msg)
   465  	for _, ts := range tasksets {
   466  		chg.AddAll(ts)
   467  	}
   468  	chg.Set("snap-names", updated)
   469  	chg.Set("api-data", map[string]interface{}{"snap-names": updated})
   470  	state.TagTimingsWithChange(perfTimings, chg)
   471  
   472  	return nil
   473  }
   474  
   475  func refreshScheduleDefault() (ts []*timeutil.Schedule, scheduleStr string, legacy bool, err error) {
   476  	refreshSchedule, err := timeutil.ParseSchedule(defaultRefreshSchedule)
   477  	if err != nil {
   478  		panic(fmt.Sprintf("defaultRefreshSchedule cannot be parsed: %s", err))
   479  	}
   480  
   481  	return refreshSchedule, defaultRefreshSchedule, false, nil
   482  }
   483  
   484  func autoRefreshInFlight(st *state.State) bool {
   485  	for _, chg := range st.Changes() {
   486  		if chg.Kind() == "auto-refresh" && !chg.Status().Ready() {
   487  			return true
   488  		}
   489  	}
   490  	return false
   491  }
   492  
   493  // refreshScheduleManaged returns true if the refresh schedule of the
   494  // device is managed by an external snap
   495  func refreshScheduleManaged(st *state.State) (managed, requested, legacy bool) {
   496  	var confStr string
   497  
   498  	// this will only be "nil" if running in tests
   499  	if CanManageRefreshes == nil {
   500  		return false, false, legacy
   501  	}
   502  
   503  	// check new style timer first
   504  	tr := config.NewTransaction(st)
   505  	err := tr.Get("core", "refresh.timer", &confStr)
   506  	if err != nil && !config.IsNoOption(err) {
   507  		return false, false, legacy
   508  	}
   509  	// if not set, fallback to refresh.schedule
   510  	if confStr == "" {
   511  		if err := tr.Get("core", "refresh.schedule", &confStr); err != nil {
   512  			return false, false, legacy
   513  		}
   514  		legacy = true
   515  	}
   516  
   517  	if confStr != "managed" {
   518  		return false, false, legacy
   519  	}
   520  
   521  	return CanManageRefreshes(st), true, legacy
   522  }
   523  
   524  // getTime retrieves a time from a state value.
   525  func getTime(st *state.State, timeKey string) (time.Time, error) {
   526  	var t1 time.Time
   527  	err := st.Get(timeKey, &t1)
   528  	if err != nil && err != state.ErrNoState {
   529  		return time.Time{}, err
   530  	}
   531  	return t1, nil
   532  }
   533  
   534  // inhibitRefresh returns an error if refresh is inhibited by running apps.
   535  //
   536  // Internally the snap state is updated to remember when the inhibition first
   537  // took place. Apps can inhibit refreshes for up to "maxInhibition", beyond
   538  // that period the refresh will go ahead despite application activity.
   539  func inhibitRefresh(st *state.State, snapst *SnapState, info *snap.Info, checker func(*snap.Info) error) error {
   540  	if err := checker(info); err != nil {
   541  		days := int(maxInhibition.Truncate(time.Hour).Hours() / 24)
   542  		now := time.Now()
   543  		if snapst.RefreshInhibitedTime == nil {
   544  			// Store the instant when the snap was first inhibited.
   545  			// This is reset to nil on successful refresh.
   546  			snapst.RefreshInhibitedTime = &now
   547  			Set(st, info.InstanceName(), snapst)
   548  			if _, ok := err.(*BusySnapError); ok {
   549  				st.Warnf(i18n.NG(
   550  					"snap %q is currently in use. Its refresh will be postponed for up to %d day to wait for the snap to no longer be in use.",
   551  					"snap %q is currently in use. Its refresh will be postponed for up to %d days to wait for the snap to no longer be in use.", days),
   552  					info.SnapName(), days)
   553  			}
   554  			return err
   555  		}
   556  
   557  		if now.Sub(*snapst.RefreshInhibitedTime) < maxInhibition {
   558  			// If we are still in the allowed window then just return
   559  			// the error but don't change the snap state again.
   560  			return err
   561  		}
   562  		if _, ok := err.(*BusySnapError); ok {
   563  			st.Warnf(i18n.NG(
   564  				"snap %q has been running for the maximum allowable %d day since its refresh was postponed. It will now be refreshed.",
   565  				"snap %q has been running for the maximum allowable %d days since its refresh was postponed. It will now be refreshed.", days),
   566  				info.SnapName(), days)
   567  		}
   568  	}
   569  	return nil
   570  }