github.com/keybase/client/go@v0.0.0-20240520164431-4f512a4c85a3/kbfs/libcontext/delayed_cancellation.go (about)

     1  // Copyright 2016 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package libcontext
     6  
     7  import (
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"golang.org/x/net/context"
    12  )
    13  
    14  // This file defines a set of functions for delaying context concellations.
    15  // It's a hacky implementation and some functions require extra caution in when
    16  // they should be called.
    17  //
    18  // For KBFS, this is mainly used to coupe with EINTR. Interrupts can happen
    19  // very regularly commonly. For example, git relies SIGALRM for periodical
    20  // progress report. Everytime SIGALRM reaches, current I/O operation gets an
    21  // interrupt. bazil/fuse calls Cancel on context when getting an interrupt. If
    22  // we return an error on this cancellation, application gets an EINTR. However,
    23  // with a lot of remote operations filesystem state can sometimes be
    24  // unpredictable, and returning EINTR might introduce inconsistency between
    25  // application's perception of state and the real state. In addition,
    26  // applications may not be ready in all scenarios to handle EINTR. By using
    27  // delayed cancellation, these issues are mitigated. Specifically, KBFS uses
    28  // this in following situations:
    29  //
    30  // 1. In a local filesystem, some operations (e.g. Attr) are considered "fast"
    31  // operations. So unlike slow ones like Read or Create whose manuals explicitly
    32  // say EINTR can happen and needs to be handled, a "fast" operation's
    33  // documentation doesn't list EINTR as possible errors. As a result, some
    34  // applications are not ready to handle EINTR in some of filesystem calls.
    35  // Using delayed cancellation for such operations means if there's an interrupt
    36  // received in the middle of the operation, it doesn't get cancelled right
    37  // away, but instead waits for a grace period before effectively cancelling the
    38  // context. This should allow the operation to finish in most cases -- unless
    39  // the network condition is too bad, in which case we choose to let application
    40  // error instead of making things unresponsive to Ctrl-C (i.e. still cancel the
    41  // context after the grace period).
    42  //
    43  // 2. To be responsive to Ctrl-C, we are using runUnlessCanceled, which returns
    44  // immediately if the context gets canceled, despite that the actual operation
    45  // routing may still be waiting on a lock or remote operations. This means
    46  // that, once we start a MD write, the filesystem state becomes unpredictable.
    47  // We enable delayed cancellation here to try to avoid context being canceled
    48  // in the middle of a MD write, also with a grace period timeout. See comments
    49  // in folder_branch_ops.go in finalizedMDWriteLocked for more.
    50  
    51  // CtxReplayKeyType is a type for the context key for CtxReplayFunc
    52  type CtxReplayKeyType int
    53  
    54  const (
    55  	// CtxReplayKey is a context key for CtxReplayFunc
    56  	CtxReplayKey CtxReplayKeyType = iota
    57  )
    58  
    59  // CtxCancellationDelayerKeyType is a type for the context key for
    60  // using cancellationDelayer
    61  type CtxCancellationDelayerKeyType int
    62  
    63  const (
    64  	// CtxCancellationDelayerKey is a context key for using cancellationDelayer
    65  	CtxCancellationDelayerKey CtxCancellationDelayerKeyType = iota
    66  )
    67  
    68  // CtxReplayFunc is a function for replaying a series of changes done on a
    69  // context.
    70  type CtxReplayFunc func(ctx context.Context) context.Context
    71  
    72  // CtxNotReplayableError is returned when NewContextWithReplayFrom is called on
    73  // a ctx with no replay func.
    74  type CtxNotReplayableError struct{}
    75  
    76  func (e CtxNotReplayableError) Error() string {
    77  	return "Unable to replay on ctx"
    78  }
    79  
    80  // NoCancellationDelayerError is returned when EnableDelayedCancellationWithGracePeriod or
    81  // ExitCritical are called on a ctx without Critical Awareness
    82  type NoCancellationDelayerError struct{}
    83  
    84  func (e NoCancellationDelayerError) Error() string {
    85  	return "Context doesn't have critical awareness or CtxCancellationDelayerKey " +
    86  		"already exists in ctx but is not of type *cancellationDelayer"
    87  }
    88  
    89  // ContextAlreadyHasCancellationDelayerError is returned when
    90  // NewContextWithCancellationDelayer is called for the second time on the same
    91  // ctx, which is not supported yet.
    92  type ContextAlreadyHasCancellationDelayerError struct{}
    93  
    94  func (e ContextAlreadyHasCancellationDelayerError) Error() string {
    95  	return "Context already has critical awareness; only one layer is supported."
    96  }
    97  
    98  // NewContextReplayable creates a new context from ctx, with change applied. It
    99  // also makes this change replayable by NewContextWithReplayFrom. When
   100  // replayed, the resulting context is replayable as well.
   101  //
   102  // It is important that all WithValue-ish mutations on ctx is done "replayably"
   103  // (with NewContextReplayable) if any delayed cancellation is used, e.g.
   104  // through EnableDelayedCancellationWithGracePeriod,
   105  func NewContextReplayable(
   106  	ctx context.Context, change CtxReplayFunc) context.Context {
   107  	ctx = change(ctx)
   108  	replays, _ := ctx.Value(CtxReplayKey).([]CtxReplayFunc)
   109  	replays = append(replays, change)
   110  	ctx = context.WithValue(ctx, CtxReplayKey, replays)
   111  	return ctx
   112  }
   113  
   114  // NewContextWithReplayFrom constructs a new context out of ctx by calling all
   115  // attached replay functions. This disconnects any existing context.CancelFunc.
   116  func NewContextWithReplayFrom(ctx context.Context) (context.Context, error) {
   117  	if replays, ok := ctx.Value(CtxReplayKey).([]CtxReplayFunc); ok {
   118  		newCtx := context.Background()
   119  		for _, replay := range replays {
   120  			newCtx = replay(newCtx)
   121  		}
   122  		replays, _ := ctx.Value(CtxReplayKey).([]CtxReplayFunc)
   123  		newCtx = context.WithValue(newCtx, CtxReplayKey, replays)
   124  		return newCtx, nil
   125  	}
   126  	return nil, CtxNotReplayableError{}
   127  }
   128  
   129  type cancellationDelayer struct {
   130  	delay    int64
   131  	canceled int64
   132  
   133  	done chan struct{}
   134  }
   135  
   136  func newCancellationDelayer() *cancellationDelayer {
   137  	return &cancellationDelayer{
   138  		done: make(chan struct{}),
   139  	}
   140  }
   141  
   142  // NewContextWithCancellationDelayer creates a new context out of ctx. All replay
   143  // functions attached to ctx are run on the new context. In addition, the
   144  // new context is made "cancellation delayable". That is, it disconnects the cancelFunc
   145  // from ctx, and watch for the cancellation. When cancellation happens, it
   146  // checks if delayed cancellation is enabled for the associated context. If so,
   147  // it waits until it's disabled before cancelling the new context. This
   148  // provides a hacky way to allow finer control over cancellation.
   149  //
   150  // Note that, it's important to call context.WithCancel (or its friends) before
   151  // this function if those cancellations need to be controllable ("cancellation
   152  // delayable"). Otherwise, the new cancelFunc is inherently NOT ("cancellation
   153  // delayable").
   154  //
   155  // If this function is called, it is caller's responsibility to either 1)
   156  // cancel ctx (the context passed in); or 2) call CleanupCancellationDelayer;
   157  // when operations associated with the context is done. Otherwise it leaks go
   158  // routines!
   159  func NewContextWithCancellationDelayer(
   160  	ctx context.Context) (newCtx context.Context, err error) {
   161  	v := ctx.Value(CtxCancellationDelayerKey)
   162  	if v != nil {
   163  		if _, ok := v.(*cancellationDelayer); ok {
   164  			return nil, ContextAlreadyHasCancellationDelayerError{}
   165  		}
   166  		return nil, NoCancellationDelayerError{}
   167  	}
   168  
   169  	if newCtx, err = NewContextWithReplayFrom(ctx); err != nil {
   170  		return nil, err
   171  	}
   172  	c := newCancellationDelayer()
   173  	newCtx = NewContextReplayable(newCtx,
   174  		func(ctx context.Context) context.Context {
   175  			return context.WithValue(ctx, CtxCancellationDelayerKey, c)
   176  		})
   177  	newCtx, cancel := context.WithCancel(newCtx)
   178  	go func() {
   179  		select {
   180  		case <-ctx.Done():
   181  		case <-c.done:
   182  		}
   183  		d := time.Duration(atomic.LoadInt64(&c.delay))
   184  		if d != 0 {
   185  			time.Sleep(d)
   186  		}
   187  		atomic.StoreInt64(&c.canceled, 1)
   188  		cancel()
   189  	}()
   190  	return newCtx, nil
   191  }
   192  
   193  // EnableDelayedCancellationWithGracePeriod can be called on a "cancellation
   194  // delayable" context produced by NewContextWithCancellationDelayer, to enable
   195  // delayed cancellation for ctx. This is useful to indicate that the
   196  // operation(s) associated with the context has entered a critical state, and
   197  // it should not be canceled until after timeout or CleanupCancellationDelayer
   198  // is called.
   199  //
   200  // Note that if EnableDelayedCancellationWithGracePeriod is called for the
   201  // second time, and the grace period has started due to a cancellation, the
   202  // grace period would not be extended (i.e. timeout has no effect in this
   203  // case). Although in this case, no error is returned, since the delayed
   204  // cancellation is already enabled.
   205  func EnableDelayedCancellationWithGracePeriod(ctx context.Context, timeout time.Duration) error {
   206  	if c, ok := ctx.Value(CtxCancellationDelayerKey).(*cancellationDelayer); ok {
   207  		if atomic.LoadInt64(&c.canceled) > 0 {
   208  			// Too late! The parent context is already canceled and timer has already
   209  			// started.
   210  			return context.Canceled
   211  		}
   212  		atomic.StoreInt64(&c.delay, int64(timeout))
   213  		return nil
   214  	}
   215  	return NoCancellationDelayerError{}
   216  }
   217  
   218  // CleanupCancellationDelayer cleans up a context (ctx) that is cancellation
   219  // delayable and makes the go routine spawned in
   220  // NewContextWithCancellationDelayer exit. As part of the cleanup, this also
   221  // causes the cancellation delayable context to be canceled, no matter whether
   222  // the timeout passed into the EnableDelayedCancellationWithGracePeriod has
   223  // passed or not.
   224  //
   225  // Ideally, the parent ctx's cancelFunc is always called upon completion of
   226  // handling a request, in which case this wouldn't be necessary.
   227  func CleanupCancellationDelayer(ctx context.Context) error {
   228  	if c, ok := ctx.Value(CtxCancellationDelayerKey).(*cancellationDelayer); ok {
   229  		close(c.done)
   230  		return nil
   231  	}
   232  	return NoCancellationDelayerError{}
   233  }
   234  
   235  // BackgroundContextWithCancellationDelayer generate a "Background"
   236  // context that is cancellation delayable
   237  func BackgroundContextWithCancellationDelayer() context.Context {
   238  	if ctx, err := NewContextWithCancellationDelayer(NewContextReplayable(
   239  		context.Background(), func(c context.Context) context.Context {
   240  			return c
   241  		})); err != nil {
   242  		panic(err)
   243  	} else {
   244  		return ctx
   245  	}
   246  }