go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/sync/dispatcher/options.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dispatcher
    16  
    17  import (
    18  	"context"
    19  
    20  	"golang.org/x/time/rate"
    21  
    22  	"go.chromium.org/luci/common/clock"
    23  	"go.chromium.org/luci/common/logging"
    24  	"go.chromium.org/luci/common/retry/transient"
    25  	"go.chromium.org/luci/common/sync/dispatcher/buffer"
    26  )
    27  
    28  // ErrorFn is called to handle the error from SendFn.
    29  //
    30  // This is also invoked with buffer.ErrItemTooLarge if your supplied
    31  // ItemSizeFunc returns a size larger than Buffer.BatchSizeMax (i.e. you pushed
    32  // an item which couldn't fit inside of a Batch). Similarly, if your
    33  // ItemSizeFunc returns <=0, this is invoked with buffer.ErrItemTooSmall.
    34  // Channel ignores the `retry` return value of this function in these cases.
    35  //
    36  // It executes in the main handler loop of the dispatcher so it can make
    37  // synchronous decisions about the dispatcher state.
    38  //
    39  // Blocking in this function will block ALL dispatcher actions, so be quick
    40  // :).
    41  //
    42  // DO NOT WRITE TO THE CHANNEL DIRECTLY FROM THIS FUNCTION. Doing so will very
    43  // likely cause deadlocks.
    44  //
    45  // This may:
    46  //   - inspect/log the error
    47  //   - manipulate the contents of failedBatch
    48  //   - return a boolean of whether this Batch should be retried or not. If
    49  //     this is false then the Batch is dropped. If it's true, then it will be
    50  //     re-queued as-is for transmission according to BufferFullBehavior.
    51  //   - pass the Batch.Data to another goroutine (in a non-blocking way!) to be
    52  //     re-queued through Channel.WriteChan.
    53  //
    54  // Args:
    55  //   - failedBatch - The Batch for which SendFn produced a non-nil error.
    56  //   - err - The error SendFn produced.
    57  //
    58  // Returns true iff the dispatcher should re-try sending this Batch, according
    59  // to Buffer.Retry.
    60  type ErrorFn func(failedBatch *buffer.Batch, err error) (retry bool)
    61  
    62  // Options is the configuration options for NewChannel.
    63  type Options struct {
    64  	// [OPTIONAL] The ErrorFn to use (see ErrorFn docs for details).
    65  	//
    66  	// Default: Logs the error (at Info for retryable errors, and Error for
    67  	// non-retryable errors) and returns true on a transient error.
    68  	ErrorFn ErrorFn
    69  
    70  	// [OPTIONAL] Called with the dropped batch any time the Channel drops a batch.
    71  	//
    72  	// This includes:
    73  	//   * When FullBehavior==DropOldestBatch and we get new data.
    74  	//   * When FullBehavior==DropOldestBatch and we attempt to retry old data.
    75  	//   * When ErrorFn returns false for a batch.
    76  	//
    77  	// It executes in the main handler loop of the dispatcher so it can make
    78  	// synchronous decisions about the dispatcher state.
    79  	//
    80  	// Blocking in this function will block ALL dispatcher actions, so be quick
    81  	// :).
    82  	//
    83  	// DO NOT WRITE TO THE CHANNEL DIRECTLY FROM THIS FUNCTION. Doing so will very
    84  	// likely cause deadlocks.
    85  	//
    86  	// When the channel is fully drained, this will be invoked exactly once with
    87  	// `(nil, true)`. This will occur immediately before the DrainedFn is called.
    88  	// Some drop functions buffer their information, and this gives them an
    89  	// opportunity to flush out any buffered data.
    90  	//
    91  	// Default: logs (at Info level if FullBehavior==DropOldestBatch, or Warning
    92  	// level otherwise) the number of data items in the Batch being dropped.
    93  	DropFn func(b *buffer.Batch, flush bool)
    94  
    95  	// [OPTIONAL] Called exactly once when the associated Channel is closed and
    96  	// has fully drained its buffer, but before DrainC is closed.
    97  	//
    98  	// Note that this takes effect whether the Channel is shut down via Context
    99  	// cancellation or explicitly by closing Channel.C.
   100  	//
   101  	// This is useful for performing final state synchronization tasks/metrics
   102  	// finalization/helpful "everything is done!" messages/etc. without having to
   103  	// poll the Channel to see if it's done and also maintain external
   104  	// synchronization around the finalization action.
   105  	//
   106  	// Called in the main handler loop, but it's called after all other work is
   107  	// done by the Channel, so the only thing it blocks is the closure of DrainC.
   108  	//
   109  	// Default: No action.
   110  	DrainedFn func()
   111  
   112  	// [OPTIONAL] A rate limiter for how frequently this will invoke SendFn.
   113  	//
   114  	// Default: No limit.
   115  	QPSLimit *rate.Limiter
   116  
   117  	// [OPTIONAL] The minimal frequency of invoking SendFn.
   118  	//
   119  	// If greater than zero, this Channel will invoke SendFn at least this often.
   120  	// If there's a period of time longer than this with no work items, Channel
   121  	// will invoke SendFn with a nil batch.
   122  	//
   123  	// Errors returned from these nil SendFn invocations are still processed
   124  	// normally, and nil batches still count against QPSLimit.
   125  	//
   126  	// It is an error to specify a MinQPS value which is
   127  	// * greater than QPSLimit.Limit(),
   128  	// * or is rate.Inf.
   129  	//
   130  	// Default: No minimum QPS, no nil batches will be sent.
   131  	MinQPS rate.Limit
   132  
   133  	// [OPTIONAL]
   134  	// Should return the size of the given buffer item (i.e. what you push into
   135  	// Channel.C) in whatever units you like (see Buffer.BatchSizeMax).
   136  	//
   137  	// The function will only ever be called once per pushed item.
   138  	//
   139  	// [REQUIRED]
   140  	// Must be non-nil if Buffer.BatchSizeMax is specified.
   141  	//
   142  	// Must return a positive value less than Buffer.BatchSizeMax. Failure to do
   143  	// so will cause `itm` to be immediately rejected from the dispatcher.Channel
   144  	// and routed to ErrorFn with no further processing.
   145  	ItemSizeFunc func(itm any) int
   146  
   147  	Buffer buffer.Options
   148  
   149  	// Debug output for tests.
   150  	testingDbg func(string, ...any)
   151  }
   152  
   153  func defaultDropFnFactory(ctx context.Context, fullBehavior buffer.FullBehavior) func(*buffer.Batch, bool) {
   154  	return func(dropped *buffer.Batch, flush bool) {
   155  		if flush {
   156  			return
   157  		}
   158  		logFn := logging.Warningf
   159  		if _, ok := fullBehavior.(*buffer.DropOldestBatch); ok {
   160  			logFn = logging.Infof
   161  		}
   162  		logFn(
   163  			ctx,
   164  			"dropping Batch(len(Data): %d, Meta: %+v)",
   165  			len(dropped.Data), dropped.Meta)
   166  	}
   167  }
   168  
   169  func defaultErrorFnFactory(ctx context.Context) ErrorFn {
   170  	return func(failedBatch *buffer.Batch, err error) (retry bool) {
   171  		retry = transient.Tag.In(err)
   172  		logFn := logging.Errorf
   173  		if retry {
   174  			logFn = logging.Infof
   175  		}
   176  		logFn(
   177  			ctx,
   178  			"failed to send Batch(len(Data): %d, Meta: %+v): %s",
   179  			len(failedBatch.Data), failedBatch.Meta, err)
   180  
   181  		return
   182  	}
   183  }
   184  
   185  // ErrorFnQuiet is an implementation of Options.ErrorFn which doesn't log the
   186  // batch, but does check for `transient.Tag` to determine `retry`.
   187  func ErrorFnQuiet(b *buffer.Batch, err error) (retry bool) {
   188  	return transient.Tag.In(err)
   189  }
   190  
   191  // ErrorFnReport is an implementation of Options.ErrorFn which sends all errors
   192  // to a buffered channel. The channel MUST be drained as quickly as possible.
   193  // Otherwise, it may block all dispatcher actions.
   194  //
   195  // If `inner` error function is provided, it is used to determine `retry`.
   196  // Otherwise, `retry` is always false.
   197  func ErrorFnReport(bufferSize int, inner ErrorFn) (ErrorFn, <-chan error) {
   198  	errCh := make(chan error, bufferSize)
   199  	return func(b *buffer.Batch, err error) bool {
   200  		errCh <- err
   201  		if inner != nil {
   202  			return inner(b, err)
   203  		}
   204  		return false
   205  	}, errCh
   206  }
   207  
   208  // DropFnQuiet is an implementation of Options.DropFn which drops batches
   209  // without logging anything.
   210  func DropFnQuiet(*buffer.Batch, bool) {}
   211  
   212  // DropFnSummarized returns an implementation of Options.DropFn which counts the
   213  // number of dropped batches, and only reports it at the rate provided.
   214  //
   215  // Unlike the default log function, this only logs the number of dropped items
   216  // and the duration that they were collected over.
   217  func DropFnSummarized(ctx context.Context, lim *rate.Limiter) func(*buffer.Batch, bool) {
   218  	durationStart := clock.Now(ctx)
   219  	dropCount := 0
   220  	return func(b *buffer.Batch, flush bool) {
   221  		dataLen := 0
   222  		if b != nil {
   223  			dataLen = len(b.Data)
   224  		}
   225  		if lim.Allow() || flush {
   226  			now := clock.Now(ctx)
   227  			logging.Infof(
   228  				ctx, "dropped %d items over %s", dropCount+dataLen, now.Sub(durationStart))
   229  			durationStart = now
   230  			dropCount = 0
   231  		} else {
   232  			dropCount += dataLen
   233  		}
   234  	}
   235  }