github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/timekeeper.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    22  	"github.com/metacubex/gvisor/pkg/log"
    23  	ktime "github.com/metacubex/gvisor/pkg/sentry/kernel/time"
    24  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    25  	"github.com/metacubex/gvisor/pkg/sentry/pgalloc"
    26  	sentrytime "github.com/metacubex/gvisor/pkg/sentry/time"
    27  	"github.com/metacubex/gvisor/pkg/sync"
    28  	"github.com/metacubex/gvisor/pkg/tcpip"
    29  )
    30  
    31  // Timekeeper manages all of the kernel clocks.
    32  //
    33  // +stateify savable
    34  type Timekeeper struct {
    35  	// clocks are the clock sources.
    36  	//
    37  	// These are not saved directly, as the new machine's clock may behave
    38  	// differently.
    39  	//
    40  	// It is set only once, by SetClocks.
    41  	clocks sentrytime.Clocks `state:"nosave"`
    42  
    43  	// realtimeClock is a ktime.Clock based on timekeeper's Realtime.
    44  	realtimeClock *timekeeperClock
    45  
    46  	// monotonicClock is a ktime.Clock based on timekeeper's Monotonic.
    47  	monotonicClock *timekeeperClock
    48  
    49  	// bootTime is the realtime when the system "booted". i.e., when
    50  	// SetClocks was called in the initial (not restored) run.
    51  	bootTime ktime.Time
    52  
    53  	// monotonicOffset is the offset to apply to the monotonic clock output
    54  	// from clocks.
    55  	//
    56  	// It is set only once, by SetClocks.
    57  	monotonicOffset int64 `state:"nosave"`
    58  
    59  	// monotonicLowerBound is the lowerBound for monotonic time.
    60  	monotonicLowerBound atomicbitops.Int64 `state:"nosave"`
    61  
    62  	// restored, if non-nil, indicates that this Timekeeper was restored
    63  	// from a state file. The clocks are not set until restored is closed.
    64  	restored chan struct{} `state:"nosave"`
    65  
    66  	// saveMonotonic is the (offset) value of the monotonic clock at the
    67  	// time of save.
    68  	//
    69  	// It is only valid if restored is non-nil.
    70  	//
    71  	// It is only used in SetClocks after restore to compute the new
    72  	// monotonicOffset.
    73  	saveMonotonic int64
    74  
    75  	// saveRealtime is the value of the realtime clock at the time of save.
    76  	//
    77  	// It is only valid if restored is non-nil.
    78  	//
    79  	// It is only used in SetClocks after restore to compute the new
    80  	// monotonicOffset.
    81  	saveRealtime int64
    82  
    83  	// params manages the parameter page.
    84  	params *VDSOParamPage
    85  
    86  	// mu protects destruction with stop and wg.
    87  	mu sync.Mutex `state:"nosave"`
    88  
    89  	// stop is used to tell the update goroutine to exit.
    90  	stop chan struct{} `state:"nosave"`
    91  
    92  	// wg is used to indicate that the update goroutine has exited.
    93  	wg sync.WaitGroup `state:"nosave"`
    94  }
    95  
    96  // NewTimekeeper returns a Timekeeper that is automatically kept up-to-date.
    97  // NewTimekeeper does not take ownership of paramPage.
    98  //
    99  // SetClocks must be called on the returned Timekeeper before it is usable.
   100  func NewTimekeeper(mf *pgalloc.MemoryFile, paramPage memmap.FileRange) *Timekeeper {
   101  	t := Timekeeper{
   102  		params: NewVDSOParamPage(mf, paramPage),
   103  	}
   104  	t.realtimeClock = &timekeeperClock{tk: &t, c: sentrytime.Realtime}
   105  	t.monotonicClock = &timekeeperClock{tk: &t, c: sentrytime.Monotonic}
   106  	return &t
   107  }
   108  
   109  // SetClocks the backing clock source.
   110  //
   111  // SetClocks must be called before the Timekeeper is used, and it may not be
   112  // called more than once, as changing the clock source without extra correction
   113  // could cause time discontinuities.
   114  //
   115  // It must also be called after Load.
   116  func (t *Timekeeper) SetClocks(c sentrytime.Clocks) {
   117  	// Update the params, marking them "not ready", as we may need to
   118  	// restart calibration on this new machine.
   119  	if t.restored != nil {
   120  		if err := t.params.Write(func() vdsoParams {
   121  			return vdsoParams{}
   122  		}); err != nil {
   123  			panic("unable to reset VDSO params: " + err.Error())
   124  		}
   125  	}
   126  
   127  	if t.clocks != nil {
   128  		panic("SetClocks called on previously-initialized Timekeeper")
   129  	}
   130  
   131  	t.clocks = c
   132  
   133  	// Compute the offset of the monotonic clock from the base Clocks.
   134  	//
   135  	// In a fresh (not restored) sentry, monotonic time starts at zero.
   136  	//
   137  	// In a restored sentry, monotonic time jumps forward by approximately
   138  	// the same amount as real time. There are no guarantees here, we are
   139  	// just making a best-effort attempt to make it appear that the app
   140  	// was simply not scheduled for a long period, rather than that the
   141  	// real time clock was changed.
   142  	//
   143  	// If real time went backwards, it remains the same.
   144  	wantMonotonic := int64(0)
   145  
   146  	nowMonotonic, err := t.clocks.GetTime(sentrytime.Monotonic)
   147  	if err != nil {
   148  		panic("Unable to get current monotonic time: " + err.Error())
   149  	}
   150  
   151  	nowRealtime, err := t.clocks.GetTime(sentrytime.Realtime)
   152  	if err != nil {
   153  		panic("Unable to get current realtime: " + err.Error())
   154  	}
   155  
   156  	if t.restored != nil {
   157  		wantMonotonic = t.saveMonotonic
   158  		elapsed := nowRealtime - t.saveRealtime
   159  		if elapsed > 0 {
   160  			wantMonotonic += elapsed
   161  		}
   162  	}
   163  
   164  	t.monotonicOffset = wantMonotonic - nowMonotonic
   165  
   166  	if t.restored == nil {
   167  		// Hold on to the initial "boot" time.
   168  		t.bootTime = ktime.FromNanoseconds(nowRealtime)
   169  	}
   170  
   171  	t.mu.Lock()
   172  	defer t.mu.Unlock()
   173  	t.startUpdater()
   174  
   175  	if t.restored != nil {
   176  		close(t.restored)
   177  	}
   178  }
   179  
   180  var _ tcpip.Clock = (*Timekeeper)(nil)
   181  
   182  // Now implements tcpip.Clock.
   183  func (t *Timekeeper) Now() time.Time {
   184  	nsec, err := t.GetTime(sentrytime.Realtime)
   185  	if err != nil {
   186  		panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error())
   187  	}
   188  	return time.Unix(0, nsec)
   189  }
   190  
   191  // NowMonotonic implements tcpip.Clock.
   192  func (t *Timekeeper) NowMonotonic() tcpip.MonotonicTime {
   193  	nsec, err := t.GetTime(sentrytime.Monotonic)
   194  	if err != nil {
   195  		panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error())
   196  	}
   197  	var mt tcpip.MonotonicTime
   198  	return mt.Add(time.Duration(nsec) * time.Nanosecond)
   199  }
   200  
   201  // AfterFunc implements tcpip.Clock.
   202  func (t *Timekeeper) AfterFunc(d time.Duration, f func()) tcpip.Timer {
   203  	return ktime.AfterFunc(t.realtimeClock, d, f)
   204  }
   205  
   206  // startUpdater starts an update goroutine that keeps the clocks updated.
   207  //
   208  // mu must be held.
   209  func (t *Timekeeper) startUpdater() {
   210  	if t.stop != nil {
   211  		// Timekeeper already started
   212  		return
   213  	}
   214  	t.stop = make(chan struct{})
   215  
   216  	// Keep the clocks up to date.
   217  	//
   218  	// Note that the Go runtime uses host CLOCK_MONOTONIC to service the
   219  	// timer, so it may run at a *slightly* different rate from the
   220  	// application CLOCK_MONOTONIC. That is fine, as we only need to update
   221  	// at approximately this rate.
   222  	timer := time.NewTicker(sentrytime.ApproxUpdateInterval)
   223  	t.wg.Add(1)
   224  	go func() { // S/R-SAFE: stopped during save.
   225  		defer t.wg.Done()
   226  		for {
   227  			// Start with an update immediately, so the clocks are
   228  			// ready ASAP.
   229  
   230  			// Call Update within a Write block to prevent the VDSO
   231  			// from using the old params between Update and
   232  			// Write.
   233  			if err := t.params.Write(func() vdsoParams {
   234  				monotonicParams, monotonicOk, realtimeParams, realtimeOk := t.clocks.Update()
   235  
   236  				var p vdsoParams
   237  				if monotonicOk {
   238  					p.monotonicReady = 1
   239  					p.monotonicBaseCycles = int64(monotonicParams.BaseCycles)
   240  					p.monotonicBaseRef = int64(monotonicParams.BaseRef) + t.monotonicOffset
   241  					p.monotonicFrequency = monotonicParams.Frequency
   242  				}
   243  				if realtimeOk {
   244  					p.realtimeReady = 1
   245  					p.realtimeBaseCycles = int64(realtimeParams.BaseCycles)
   246  					p.realtimeBaseRef = int64(realtimeParams.BaseRef)
   247  					p.realtimeFrequency = realtimeParams.Frequency
   248  				}
   249  				return p
   250  			}); err != nil {
   251  				log.Warningf("Unable to update VDSO parameter page: %v", err)
   252  			}
   253  
   254  			select {
   255  			case <-timer.C:
   256  			case <-t.stop:
   257  				return
   258  			}
   259  		}
   260  	}()
   261  }
   262  
   263  // stopUpdater stops the update goroutine, blocking until it exits.
   264  //
   265  // mu must be held.
   266  func (t *Timekeeper) stopUpdater() {
   267  	if t.stop == nil {
   268  		// Updater not running.
   269  		return
   270  	}
   271  
   272  	close(t.stop)
   273  	t.wg.Wait()
   274  	t.stop = nil
   275  }
   276  
   277  // Destroy destroys the Timekeeper, freeing all associated resources.
   278  func (t *Timekeeper) Destroy() {
   279  	t.mu.Lock()
   280  	defer t.mu.Unlock()
   281  
   282  	t.stopUpdater()
   283  }
   284  
   285  // PauseUpdates stops clock parameter updates. This should only be used when
   286  // Tasks are not running and thus cannot access the clock.
   287  func (t *Timekeeper) PauseUpdates() {
   288  	t.mu.Lock()
   289  	defer t.mu.Unlock()
   290  	t.stopUpdater()
   291  }
   292  
   293  // ResumeUpdates restarts clock parameter updates stopped by PauseUpdates.
   294  func (t *Timekeeper) ResumeUpdates() {
   295  	t.mu.Lock()
   296  	defer t.mu.Unlock()
   297  	t.startUpdater()
   298  }
   299  
   300  // GetTime returns the current time in nanoseconds.
   301  func (t *Timekeeper) GetTime(c sentrytime.ClockID) (int64, error) {
   302  	if t.clocks == nil {
   303  		if t.restored == nil {
   304  			panic("Timekeeper used before initialized with SetClocks")
   305  		}
   306  		<-t.restored
   307  	}
   308  	now, err := t.clocks.GetTime(c)
   309  	if err == nil && c == sentrytime.Monotonic {
   310  		now += t.monotonicOffset
   311  		for {
   312  			// It's possible that the clock is shaky. This may be due to
   313  			// platform issues, e.g. the KVM platform relies on the guest
   314  			// TSC and host TSC, which may not be perfectly in sync. To
   315  			// work around this issue, ensure that the monotonic time is
   316  			// always bounded by the last time read.
   317  			oldLowerBound := t.monotonicLowerBound.Load()
   318  			if now < oldLowerBound {
   319  				now = oldLowerBound
   320  				break
   321  			}
   322  			if t.monotonicLowerBound.CompareAndSwap(oldLowerBound, now) {
   323  				break
   324  			}
   325  		}
   326  	}
   327  	return now, err
   328  }
   329  
   330  // BootTime returns the system boot real time.
   331  func (t *Timekeeper) BootTime() ktime.Time {
   332  	return t.bootTime
   333  }
   334  
   335  // timekeeperClock is a ktime.Clock that reads time from a
   336  // kernel.Timekeeper-managed clock.
   337  //
   338  // +stateify savable
   339  type timekeeperClock struct {
   340  	tk *Timekeeper
   341  	c  sentrytime.ClockID
   342  
   343  	// Implements ktime.Clock.WallTimeUntil.
   344  	ktime.WallRateClock `state:"nosave"`
   345  
   346  	// Implements waiter.Waitable. (We have no ability to detect
   347  	// discontinuities from external changes to CLOCK_REALTIME).
   348  	ktime.NoClockEvents `state:"nosave"`
   349  }
   350  
   351  // Now implements ktime.Clock.Now.
   352  func (tc *timekeeperClock) Now() ktime.Time {
   353  	now, err := tc.tk.GetTime(tc.c)
   354  	if err != nil {
   355  		panic(fmt.Sprintf("timekeeperClock(ClockID=%v)).Now: %v", tc.c, err))
   356  	}
   357  	return ktime.FromNanoseconds(now)
   358  }