github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/rekey_fsm.go (about)

     1  // Copyright 2017 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package libkbfs
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/keybase/client/go/kbfs/data"
    14  	"github.com/keybase/client/go/kbfs/kbfssync"
    15  	"github.com/keybase/client/go/kbfs/tlf"
    16  	"github.com/keybase/client/go/logger"
    17  )
    18  
    19  /*
    20  
    21   This file defines a finite state machine (FSM) for rekey operation scheduling.
    22   The state chart is described in following dot graph:
    23  
    24  digraph rekeyFSM {
    25    graph [rankdir=LR]
    26    start [shape=plaintext]
    27  
    28    Idle -> Idle [label="*"]
    29    Scheduled -> Scheduled [label="*"]
    30    Started -> Started [label="*"]
    31  
    32    start -> Idle
    33    Idle -> Scheduled [label=Request]
    34    Scheduled -> Scheduled [label="Request,RekeyNotNeeded"]
    35    Scheduled -> Started [label=Timeup]
    36    Started -> Scheduled [label="Finished(TTL valid && (rekey done || needs paper))"]
    37    Started -> Idle [label="Finished (*)"]
    38  }
    39  
    40  */
    41  
    42  // CtxRekeyTagKey is the type used for unique context tags within an
    43  // enqueued Rekey.
    44  type CtxRekeyTagKey int
    45  
    46  const (
    47  	// CtxRekeyIDKey is the type of the tag for unique operation IDs
    48  	// within an enqueued Rekey.
    49  	CtxRekeyIDKey CtxRekeyTagKey = iota
    50  )
    51  
    52  // CtxRekeyOpID is the display name for the unique operation
    53  // enqueued rekey ID tag.
    54  const CtxRekeyOpID = "REKEYID"
    55  
    56  type rekeyEventType int
    57  
    58  const (
    59  	_ rekeyEventType = iota
    60  	rekeyRequestEvent
    61  	rekeyFinishedEvent
    62  	rekeyTimeupEvent
    63  	rekeyNotNeededEvent
    64  	rekeyKickoffEvent
    65  
    66  	rekeyShutdownEvent
    67  
    68  	rekeyCancelEventForTest
    69  )
    70  
    71  func (e rekeyEventType) String() string {
    72  	switch e {
    73  	case rekeyRequestEvent:
    74  		return "rekeyRequestEvent"
    75  	case rekeyFinishedEvent:
    76  		return "rekeyFinishedEvent"
    77  	case rekeyTimeupEvent:
    78  		return "rekeyTimeupEvent"
    79  	case rekeyNotNeededEvent:
    80  		return "rekeyNotNeededEvent"
    81  	case rekeyShutdownEvent:
    82  		return "rekeyShutdownEvent"
    83  	case rekeyKickoffEvent:
    84  		return "rekeyKickoffEvent"
    85  	case rekeyCancelEventForTest:
    86  		return "rekeyCancelEventForTest"
    87  	default:
    88  		return "unknown"
    89  	}
    90  }
    91  
    92  // rekeyTask describes a rekey task.
    93  type rekeyTask struct {
    94  	// timeout, if non-nil, causes rekey to fail if it takes more than this
    95  	// duration since it enters rekeyStateStarted.
    96  	timeout     *time.Duration
    97  	ttl         int
    98  	promptPaper bool
    99  
   100  	ctx *protectedContext
   101  }
   102  
   103  // rekeyRequest describes a rekey request.
   104  type rekeyRequest struct {
   105  	// delay is the duration to wait for since the request enters the FSM until
   106  	// starting the rekey.
   107  	delay time.Duration
   108  	rekeyTask
   109  }
   110  
   111  // rekeyFinished describes a rekeyFinishedEvent. It contains results from an
   112  // actual rekey operation.
   113  type rekeyFinished struct {
   114  	RekeyResult
   115  	err error
   116  }
   117  
   118  // RekeyEvent describes an event to send into the RekeyFSM. A function, e.g.,
   119  // NewRekeyRequestEvent, should be used to construct one.
   120  type RekeyEvent struct {
   121  	eventType rekeyEventType
   122  	request   *rekeyRequest
   123  	finished  *rekeyFinished
   124  }
   125  
   126  func (e RekeyEvent) String() string {
   127  	switch e.eventType {
   128  	case rekeyRequestEvent:
   129  		return fmt.Sprintf("%s [%#+v]", e.eventType, e.request)
   130  	case rekeyFinishedEvent:
   131  		return fmt.Sprintf("%s [%#+v]", e.eventType, e.finished)
   132  	default:
   133  		return e.eventType.String()
   134  	}
   135  }
   136  
   137  func newRekeyRequestEvent(req rekeyRequest) RekeyEvent {
   138  	return RekeyEvent{
   139  		eventType: rekeyRequestEvent,
   140  		request:   &req,
   141  	}
   142  }
   143  
   144  func newRekeyRequestEventWithContext(ctx context.Context) RekeyEvent {
   145  	return newRekeyRequestEvent(rekeyRequest{
   146  		delay: 0,
   147  		rekeyTask: rekeyTask{
   148  			timeout:     nil,
   149  			promptPaper: false,
   150  			ttl:         rekeyInitialTTL,
   151  			ctx:         newProtectedContext(ctx, nil),
   152  		},
   153  	})
   154  }
   155  
   156  // NewRekeyRequestWithPaperPromptEvent creates a non-delayed rekey request
   157  // Event that causes a paper prompt.
   158  func NewRekeyRequestWithPaperPromptEvent() RekeyEvent {
   159  	e := NewRekeyRequestEvent()
   160  	d := rekeyWithPromptWaitTimeDefault
   161  	e.request.promptPaper = true
   162  	e.request.timeout = &d
   163  	return e
   164  }
   165  
   166  // NewRekeyRequestEvent creates a non-delayed rekey request Event.
   167  func NewRekeyRequestEvent() RekeyEvent {
   168  	return newRekeyRequestEventWithContext(CtxWithRandomIDReplayable(
   169  		context.Background(), CtxRekeyIDKey, CtxRekeyOpID, nil))
   170  }
   171  
   172  // NewRekeyNotNeededEvent creates a rekeyNotNeededEvent typed event. If the FSM
   173  // is in rekeyStateScheduled, this causes FSM to unset paperkey prompt. In
   174  // other states nothing happens. This event is sent to the FSM when we see a MD
   175  // update with rekey flag unset. It can be an indication that an old
   176  // outstanding rekey request has been served by another device, or just a
   177  // regular rekey updates.
   178  func NewRekeyNotNeededEvent() RekeyEvent {
   179  	return RekeyEvent{
   180  		eventType: rekeyNotNeededEvent,
   181  	}
   182  }
   183  
   184  func newRekeyFinishedEvent(res RekeyResult, err error) RekeyEvent {
   185  	return RekeyEvent{
   186  		eventType: rekeyFinishedEvent,
   187  		finished: &rekeyFinished{
   188  			RekeyResult: res,
   189  			err:         err,
   190  		},
   191  	}
   192  }
   193  
   194  func newRekeyTimeupEvent() RekeyEvent {
   195  	return RekeyEvent{
   196  		eventType: rekeyTimeupEvent,
   197  	}
   198  }
   199  
   200  func newRekeyShutdownEvent() RekeyEvent {
   201  	return RekeyEvent{
   202  		eventType: rekeyShutdownEvent,
   203  	}
   204  }
   205  
   206  func newRekeyKickoffEvent() RekeyEvent {
   207  	return RekeyEvent{
   208  		eventType: rekeyKickoffEvent,
   209  	}
   210  }
   211  
   212  func newRekeyCancelEventForTest() RekeyEvent {
   213  	return RekeyEvent{
   214  		eventType: rekeyCancelEventForTest,
   215  	}
   216  }
   217  
   218  // rekeyState models a state in the FSM. rekeyFSM keeps exactly one instance of
   219  // rekeyState at any given time.
   220  type rekeyState interface {
   221  	// reactToEvent defines how this state reacts to an event. Implementations of
   222  	// rekeyState should handle necessary transition actions in reactToEvent(),
   223  	// and return a new rekeyState instance after transition is finished.
   224  	// rekeyFSM sends event to the rekeyState instance it holds whenever it
   225  	// receives an event, and use the returned rekeyState instance as new state.
   226  	// It's OK to return the receiver itself as "new" state.
   227  	//
   228  	// rekeyFSM runs an event loop in a dedicated goroutine that calls
   229  	// reactToEvent and updates states. In other words, it's safe to assume
   230  	// reactToEvent is only called within the same goroutine, and that it's
   231  	// impossible that multiple reactToEvent calls are issued concurrently.
   232  	reactToEvent(event RekeyEvent) rekeyState
   233  }
   234  
   235  type rekeyStateIdle struct {
   236  	fsm *rekeyFSM
   237  }
   238  
   239  func newRekeyStateIdle(fsm *rekeyFSM) *rekeyStateIdle {
   240  	return &rekeyStateIdle{fsm: fsm}
   241  }
   242  
   243  func (r *rekeyStateIdle) reactToEvent(event RekeyEvent) rekeyState {
   244  	switch event.eventType {
   245  	case rekeyRequestEvent:
   246  		return newRekeyStateScheduled(r.fsm,
   247  			event.request.delay, event.request.rekeyTask)
   248  	default:
   249  		return r
   250  	}
   251  }
   252  
   253  type rekeyStateScheduled struct {
   254  	fsm *rekeyFSM
   255  
   256  	timer    *time.Timer
   257  	deadline time.Time
   258  
   259  	task rekeyTask
   260  }
   261  
   262  func newRekeyStateScheduled(
   263  	fsm *rekeyFSM, delay time.Duration, task rekeyTask) *rekeyStateScheduled {
   264  	task.ctx.setLogger(fsm.log)
   265  	return &rekeyStateScheduled{
   266  		fsm: fsm,
   267  		timer: time.AfterFunc(delay, func() {
   268  			fsm.Event(newRekeyTimeupEvent())
   269  		}),
   270  		deadline: time.Now().Add(delay),
   271  		task:     task,
   272  	}
   273  }
   274  
   275  func (r *rekeyStateScheduled) reactToEvent(event RekeyEvent) rekeyState {
   276  	switch event.eventType {
   277  	case rekeyTimeupEvent:
   278  		// This blocks (which inheritently blocks the entire FSM) if too many
   279  		// are active.
   280  		if r.fsm.fbo.config.GetRekeyFSMLimiter().WaitToStart(
   281  			r.task.ctx.context()) != nil {
   282  			return r
   283  		}
   284  		return newRekeyStateStarted(r.fsm, r.task)
   285  	case rekeyRequestEvent:
   286  		if r.task.promptPaper && !event.request.promptPaper {
   287  			// KBFS-2251: If fbo concludes that paper key would be needed in
   288  			// order for rekey to proceed, it writes a MD to mdserver with
   289  			// rekey set at the same time. To prevent the FSM from being kicked
   290  			// of to rekeyStateStarted right away after receiving this update
   291  			// (through FoldersNeedRekey) from mdserver, we just reuse the same
   292  			// timer if r.task.promptPaper is set.
   293  			//
   294  			// If the request has promptPaper set, then it's from the KBFS
   295  			// client, likely due to a read request. In this case, we should
   296  			// shorten the wait timer according the the request.
   297  			r.fsm.log.CDebugf(r.task.ctx.context(), "Reusing existing timer "+
   298  				"without possibly shortening due to r.task.promptPaper==true")
   299  			return r
   300  		}
   301  
   302  		task := r.task
   303  		task.promptPaper = task.promptPaper || event.request.promptPaper
   304  		if task.timeout == nil {
   305  			task.timeout = event.request.timeout
   306  		}
   307  		task.ttl = event.request.ttl
   308  		task.ctx.maybeReplaceContext(event.request.ctx.context())
   309  		if !r.deadline.After(time.Now().Add(event.request.delay)) {
   310  			r.fsm.log.CDebugf(task.ctx.context(), "Reusing existing timer")
   311  			r.task = task
   312  			return r
   313  		}
   314  		r.timer.Stop()
   315  		return newRekeyStateScheduled(r.fsm, event.request.delay, task)
   316  	case rekeyNotNeededEvent:
   317  		// KBFS-2254: if another device finished rekey, we should unset the
   318  		// paperkey prompt so that if this other device goes offline before a
   319  		// third device triggers a rekey request, the timer can be preempted.
   320  		// What if the FoldersNeedRekey call comes in before this and we still
   321  		// miss the rekey request? Well now we also send a rekey request into
   322  		// the FSM on MD updates with rekey flag set. Since the MD updates are
   323  		// applied in order, and that FSM's state transition is
   324  		// single-goroutined, we are safe here.
   325  		r.task.promptPaper = false
   326  		return r
   327  	case rekeyKickoffEvent:
   328  		r.timer.Reset(time.Millisecond)
   329  		return r
   330  	case rekeyCancelEventForTest:
   331  		r.timer.Stop()
   332  		return newRekeyStateIdle(r.fsm)
   333  	case rekeyShutdownEvent:
   334  		r.timer.Stop()
   335  		return r
   336  	default:
   337  		return r
   338  	}
   339  }
   340  
   341  type rekeyStateStarted struct {
   342  	fsm  *rekeyFSM
   343  	task rekeyTask
   344  }
   345  
   346  func newRekeyStateStarted(fsm *rekeyFSM, task rekeyTask) *rekeyStateStarted {
   347  	ctx := task.ctx.context()
   348  	var cancel context.CancelFunc
   349  	if task.timeout != nil {
   350  		ctx, cancel = context.WithTimeout(task.ctx.context(), *task.timeout)
   351  	}
   352  	go func() {
   353  		defer fsm.fbo.config.GetRekeyFSMLimiter().Done()
   354  		if cancel != nil {
   355  			defer cancel()
   356  		}
   357  		fsm.log.CDebugf(ctx, "Processing rekey for %s", fsm.fbo.folderBranch.Tlf)
   358  		var res RekeyResult
   359  		err := fsm.fbo.doMDWriteWithRetryUnlessCanceled(ctx,
   360  			func(lState *kbfssync.LockState) (err error) {
   361  				res, err = fsm.fbo.rekeyLocked(ctx, lState, task.promptPaper)
   362  				return err
   363  			})
   364  		fsm.log.CDebugf(ctx, "Rekey finished with res=%#+v, error=%v", res, err)
   365  		fsm.Event(newRekeyFinishedEvent(res, err))
   366  	}()
   367  	return &rekeyStateStarted{
   368  		fsm:  fsm,
   369  		task: task,
   370  	}
   371  }
   372  
   373  func (r *rekeyStateStarted) reactToEvent(event RekeyEvent) rekeyState {
   374  	switch event.eventType {
   375  	case rekeyFinishedEvent:
   376  		ttl := r.task.ttl - 1
   377  		r.fsm.log.CDebugf(r.task.ctx.context(),
   378  			"Rekey finished, ttl: %d -> %d", r.task.ttl, ttl)
   379  
   380  		if ttl <= 0 {
   381  			r.fsm.log.CDebugf(r.task.ctx.context(),
   382  				"Not scheduling new rekey because TTL expired")
   383  			return newRekeyStateIdle(r.fsm)
   384  		}
   385  
   386  		switch event.finished.err {
   387  		case nil:
   388  		default:
   389  			r.fsm.log.CDebugf(r.task.ctx.context(),
   390  				"Rekey errored; scheduling new rekey in %s", rekeyRecheckInterval)
   391  			return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{
   392  				timeout:     r.task.timeout,
   393  				promptPaper: r.task.promptPaper,
   394  				ttl:         ttl,
   395  				ctx:         r.task.ctx,
   396  			})
   397  		}
   398  
   399  		d := r.fsm.fbo.config.RekeyWithPromptWaitTime()
   400  		if event.finished.NeedsPaperKey {
   401  			r.fsm.log.CDebugf(r.task.ctx.context(),
   402  				"Scheduling rekey due to NeedsPaperKey==true")
   403  			return newRekeyStateScheduled(r.fsm, d, rekeyTask{
   404  				timeout:     &d,
   405  				promptPaper: true,
   406  				ttl:         ttl,
   407  				ctx:         r.task.ctx,
   408  			})
   409  		}
   410  
   411  		if event.finished.DidRekey {
   412  			// We enqueue the rekey here again, in case we missed a device due to a
   413  			// race condition. This is specifically for the situation where user
   414  			// provisions two devices in a row, and the key update for the 2nd device
   415  			// only comes in after rekey for a TLF is done, which didn't include the
   416  			// second device. At this point, there wouldn't be a new MD with rekey
   417  			// bit set since it's already set. As a result, the TLF won't get rekeyed
   418  			// for the second device until the next 1-hour timer triggers another
   419  			// scan.
   420  			r.fsm.log.CDebugf(r.task.ctx.context(),
   421  				"Scheduling rekey (recheck) due to DidRekey==true")
   422  			return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{
   423  				timeout:     nil,
   424  				promptPaper: false,
   425  				ttl:         ttl,
   426  				ctx:         r.task.ctx,
   427  			})
   428  		}
   429  
   430  		r.fsm.log.CDebugf(r.task.ctx.context(),
   431  			"Not scheduling rekey because no more rekeys or rechecks are needed")
   432  		return newRekeyStateIdle(r.fsm)
   433  	default:
   434  		return r
   435  	}
   436  }
   437  
   438  type rekeyFSMListener struct {
   439  	repeatedly bool
   440  	onEvent    func(RekeyEvent)
   441  }
   442  
   443  type rekeyFSM struct {
   444  	shutdownCh chan struct{}
   445  	reqs       chan RekeyEvent
   446  
   447  	fbo *folderBranchOps
   448  	log logger.Logger
   449  
   450  	current rekeyState
   451  
   452  	muListeners sync.Mutex
   453  	listeners   map[rekeyEventType][]rekeyFSMListener
   454  }
   455  
   456  // NewRekeyFSM creates a new rekey FSM.
   457  func NewRekeyFSM(fbo *folderBranchOps) RekeyFSM {
   458  	fsm := &rekeyFSM{
   459  		reqs:       make(chan RekeyEvent, fbo.config.Mode().RekeyQueueSize()),
   460  		shutdownCh: make(chan struct{}),
   461  		fbo:        fbo,
   462  		log:        fbo.config.MakeLogger("RekeyFSM"),
   463  
   464  		listeners: make(map[rekeyEventType][]rekeyFSMListener),
   465  	}
   466  	fsm.current = newRekeyStateIdle(fsm)
   467  	if fbo.bType == standard {
   468  		go fsm.loop()
   469  	}
   470  	return fsm
   471  }
   472  
   473  func (m *rekeyFSM) loop() {
   474  	reqs := m.reqs
   475  	for {
   476  		select {
   477  		case e := <-reqs:
   478  			next := m.current.reactToEvent(e)
   479  			if e.eventType == rekeyShutdownEvent {
   480  				// Set reqs to nil so on next iteration, we will skip any
   481  				// content in reqs. So if there are multiple
   482  				// rekeyShutdownEvent, we won't close m.shutdownCh multiple
   483  				// times.
   484  				reqs = nil
   485  				close(m.shutdownCh)
   486  			} else {
   487  				// Only log if we're not shutting down, otherwise `go vet`
   488  				// yells at us in tests.
   489  				m.log.Debug("RekeyFSM transition: %T + %s -> %T",
   490  					m.current, e, next)
   491  			}
   492  			m.current = next
   493  			m.triggerCallbacksForTest(e)
   494  
   495  		case <-m.shutdownCh:
   496  			return
   497  		}
   498  	}
   499  }
   500  
   501  // Event implements RekeyFSM interface for rekeyFSM.
   502  func (m *rekeyFSM) Event(event RekeyEvent) {
   503  	select {
   504  	case m.reqs <- event:
   505  	case <-m.shutdownCh:
   506  	}
   507  }
   508  
   509  // Shutdown implements RekeyFSM interface for rekeyFSM.
   510  func (m *rekeyFSM) Shutdown() {
   511  	m.Event(newRekeyShutdownEvent())
   512  }
   513  
   514  func (m *rekeyFSM) triggerCallbacksForTest(e RekeyEvent) {
   515  	var cbs []rekeyFSMListener
   516  	func() {
   517  		m.muListeners.Lock()
   518  		defer m.muListeners.Unlock()
   519  		cbs = m.listeners[e.eventType]
   520  		m.listeners[e.eventType] = nil
   521  		for _, cb := range cbs {
   522  			if cb.repeatedly {
   523  				m.listeners[e.eventType] = append(
   524  					m.listeners[e.eventType], cb)
   525  			}
   526  		}
   527  	}()
   528  	for _, cb := range cbs {
   529  		cb.onEvent(e)
   530  	}
   531  }
   532  
   533  // listenOnEvent implements RekeyFSM interface for rekeyFSM.
   534  func (m *rekeyFSM) listenOnEvent(
   535  	event rekeyEventType, callback func(RekeyEvent), repeatedly bool) {
   536  	m.muListeners.Lock()
   537  	defer m.muListeners.Unlock()
   538  	m.listeners[event] = append(m.listeners[event], rekeyFSMListener{
   539  		onEvent:    callback,
   540  		repeatedly: repeatedly,
   541  	})
   542  }
   543  
   544  func getRekeyFSM(ctx context.Context, ops KBFSOps, tlfID tlf.ID) RekeyFSM {
   545  	switch o := ops.(type) {
   546  	case *KBFSOpsStandard:
   547  		return o.getOpsNoAdd(
   548  			ctx, data.FolderBranch{
   549  				Tlf:    tlfID,
   550  				Branch: data.MasterBranch,
   551  			}).rekeyFSM
   552  	default:
   553  		panic("unknown KBFSOps")
   554  	}
   555  }
   556  
   557  // RequestRekeyAndWaitForOneFinishEvent sends a rekey request to the FSM
   558  // associated with tlfID, and wait for exact one rekeyFinished event. This can
   559  // be useful for waiting for a rekey result in tests.
   560  //
   561  // Note that the supplied ctx is injected to the rekey task, so canceling ctx
   562  // would actually cancel the rekey.
   563  //
   564  // Currently this is only used in tests and RekeyFile. Normal rekey activities
   565  // should go through the FSM asychronously.
   566  func RequestRekeyAndWaitForOneFinishEvent(ctx context.Context,
   567  	ops KBFSOps, tlfID tlf.ID) (res RekeyResult, err error) {
   568  	fsm := getRekeyFSM(ctx, ops, tlfID)
   569  	rekeyWaiter := make(chan struct{})
   570  	fsm.listenOnEvent(rekeyFinishedEvent, func(e RekeyEvent) {
   571  		res = e.finished.RekeyResult
   572  		err = e.finished.err
   573  		close(rekeyWaiter)
   574  	}, false)
   575  	fsm.Event(newRekeyRequestEventWithContext(ctx))
   576  	<-rekeyWaiter
   577  	return res, err
   578  }