github.com/blend/go-sdk@v1.20220411.3/autoflush/buffer.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package autoflush
     9  
    10  import (
    11  	"context"
    12  	"fmt"
    13  	"runtime"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/blend/go-sdk/async"
    18  	"github.com/blend/go-sdk/collections"
    19  	"github.com/blend/go-sdk/ex"
    20  	"github.com/blend/go-sdk/logger"
    21  	"github.com/blend/go-sdk/stats"
    22  	"github.com/blend/go-sdk/timeutil"
    23  )
    24  
    25  // New creates a new  buffer.
    26  func New(handler Action, options ...Option) *Buffer {
    27  	afb := Buffer{
    28  		Latch:               async.NewLatch(),
    29  		Handler:             handler,
    30  		Parallelism:         runtime.NumCPU(),
    31  		MaxFlushes:          DefaultMaxFlushes,
    32  		MaxLen:              DefaultMaxLen,
    33  		Interval:            DefaultFlushInterval,
    34  		ShutdownGracePeriod: DefaultShutdownGracePeriod,
    35  	}
    36  	for _, option := range options {
    37  		option(&afb)
    38  	}
    39  	afb.contents = collections.NewRingBufferWithCapacity(afb.MaxLen)
    40  	return &afb
    41  }
    42  
    43  // Option is an option for auto-flush buffers.
    44  type Option func(*Buffer)
    45  
    46  // OptMaxFlushes sets the auto-flush buffer's maximum flush queue length.
    47  func OptMaxFlushes(maxFlushes int) Option {
    48  	return func(afb *Buffer) {
    49  		afb.MaxFlushes = maxFlushes
    50  	}
    51  }
    52  
    53  // OptParallelism sets the auto-flush buffer's flush worker count.
    54  func OptParallelism(parallelism int) Option {
    55  	return func(afb *Buffer) {
    56  		afb.Parallelism = parallelism
    57  	}
    58  }
    59  
    60  // OptMaxLen sets the auto-flush buffer's maximum length.
    61  func OptMaxLen(maxLen int) Option {
    62  	return func(afb *Buffer) {
    63  		afb.MaxLen = maxLen
    64  	}
    65  }
    66  
    67  // OptInterval sets the auto-flush buffer's interval.
    68  func OptInterval(d time.Duration) Option {
    69  	return func(afb *Buffer) {
    70  		afb.Interval = d
    71  	}
    72  }
    73  
    74  // OptContext sets the auto-flush buffer's context.
    75  func OptContext(ctx context.Context) Option {
    76  	return func(afb *Buffer) {
    77  		afb.Context = ctx
    78  	}
    79  }
    80  
    81  // OptErrors sets the auto-flush buffer's error return channel.
    82  func OptErrors(errors chan error) Option {
    83  	return func(afb *Buffer) {
    84  		afb.Errors = errors
    85  	}
    86  }
    87  
    88  // OptShutdownGracePeriod sets the auto-flush buffer's shutdown grace period.
    89  func OptShutdownGracePeriod(shutdownGracePeriod time.Duration) Option {
    90  	return func(afb *Buffer) {
    91  		afb.ShutdownGracePeriod = shutdownGracePeriod
    92  	}
    93  }
    94  
    95  // OptLog sets the Buffer logger.
    96  func OptLog(log logger.Log) Option {
    97  	return func(afb *Buffer) {
    98  		afb.Log = log
    99  	}
   100  }
   101  
   102  // OptStats sets the Buffer stats collector.
   103  func OptStats(stats stats.Collector) Option {
   104  	return func(afb *Buffer) {
   105  		afb.Stats = stats
   106  	}
   107  }
   108  
   109  // OptTracer sets the Buffer logger.
   110  func OptTracer(tracer Tracer) Option {
   111  	return func(afb *Buffer) {
   112  		afb.Tracer = tracer
   113  	}
   114  }
   115  
   116  // Action is an action called by an  buffer.
   117  type Action func(context.Context, []interface{}) error
   118  
   119  // Buffer is a backing store that operates either on a fixed length flush or a fixed interval flush.
   120  // A handler should be provided but without one the buffer will just clear.
   121  // Adds that would cause fixed length flushes do not block on the flush handler.
   122  type Buffer struct {
   123  	Latch   *async.Latch
   124  	Context context.Context
   125  
   126  	Log    logger.Log
   127  	Stats  stats.Collector
   128  	Tracer Tracer
   129  
   130  	MaxLen              int
   131  	Interval            time.Duration
   132  	Parallelism         int
   133  	MaxFlushes          int
   134  	ShutdownGracePeriod time.Duration
   135  
   136  	contentsMu sync.Mutex
   137  	contents   *collections.RingBuffer
   138  
   139  	Handler Action
   140  	Errors  chan error
   141  
   142  	intervalWorker    *async.Interval
   143  	flushes           chan Flush
   144  	flushWorkersReady chan *async.Worker
   145  	flushWorkers      []*async.Worker
   146  }
   147  
   148  // Background returns a background context.
   149  func (ab *Buffer) Background() context.Context {
   150  	if ab.Context != nil {
   151  		return ab.Context
   152  	}
   153  	return context.Background()
   154  }
   155  
   156  //Start starts the auto-flush buffer.
   157  /*
   158  This call blocks. To call it asynchronously:
   159  
   160  	go afb.Start()
   161  	<-afb.NotifyStarted()
   162  */
   163  func (ab *Buffer) Start() error {
   164  	if !ab.Latch.CanStart() {
   165  		return ex.New(async.ErrCannotStart)
   166  	}
   167  	ab.Latch.Starting()
   168  
   169  	ab.flushes = make(chan Flush, ab.MaxFlushes)
   170  	ab.flushWorkers = make([]*async.Worker, ab.Parallelism)
   171  	ab.flushWorkersReady = make(chan *async.Worker, ab.Parallelism)
   172  	ab.intervalWorker = async.NewInterval(ab.FlushAsync, ab.Interval, async.OptIntervalErrors(ab.Errors))
   173  
   174  	for x := 0; x < ab.Parallelism; x++ {
   175  		worker := async.NewWorker(ab.workerAction)
   176  		worker.Context = ab.Context
   177  		worker.Errors = ab.Errors
   178  		worker.Finalizer = ab.returnFlushWorker
   179  		go func() { _ = worker.Start() }()
   180  		<-worker.NotifyStarted()
   181  		ab.flushWorkers[x] = worker
   182  		ab.flushWorkersReady <- worker
   183  	}
   184  	go func() { _ = ab.intervalWorker.Start() }()
   185  	ab.Dispatch()
   186  	return nil
   187  }
   188  
   189  // Dispatch is the main run loop.
   190  func (ab *Buffer) Dispatch() {
   191  	ab.Latch.Started()
   192  
   193  	var stopping <-chan struct{}
   194  	var flushWorker *async.Worker
   195  	var flush Flush
   196  	for {
   197  		stopping = ab.Latch.NotifyStopping()
   198  		select {
   199  		case <-stopping:
   200  			ab.Latch.Stopped()
   201  			return
   202  		default:
   203  		}
   204  		select {
   205  		case flush = <-ab.flushes:
   206  			select {
   207  			case flushWorker = <-ab.flushWorkersReady:
   208  				flushWorker.Work <- flush
   209  			case <-stopping:
   210  				ab.flushes <- flush
   211  				ab.Latch.Stopped()
   212  				return
   213  			}
   214  		case <-stopping:
   215  			ab.Latch.Stopped()
   216  			return
   217  		}
   218  	}
   219  }
   220  
   221  // Stop stops the buffer flusher.
   222  //
   223  // Any in flight flushes will be given ShutdownGracePeriod amount of time.
   224  //
   225  // Stop is _very_ complicated.
   226  func (ab *Buffer) Stop() error {
   227  	if !ab.Latch.CanStop() {
   228  		return ex.New(async.ErrCannotStop)
   229  	}
   230  	// stop the interval worker
   231  	ab.intervalWorker.WaitStopped()
   232  
   233  	// stop the running dispatch loop
   234  	ab.Latch.WaitStopped()
   235  
   236  	timeoutContext, cancel := context.WithTimeout(ab.Background(), ab.ShutdownGracePeriod)
   237  	defer cancel()
   238  
   239  	ab.contentsMu.Lock()
   240  	defer ab.contentsMu.Unlock()
   241  	if ab.contents.Len() > 0 {
   242  		ab.flushes <- Flush{
   243  			Context:  timeoutContext,
   244  			Contents: ab.contents.Drain(),
   245  		}
   246  	}
   247  
   248  	if remainingFlushes := len(ab.flushes); remainingFlushes > 0 {
   249  		logger.MaybeDebugf(ab.Log, "%d flushes remaining", remainingFlushes)
   250  		var flushWorker *async.Worker
   251  		var flush Flush
   252  		for x := 0; x < remainingFlushes; x++ {
   253  			select {
   254  			case <-timeoutContext.Done():
   255  				logger.MaybeDebugf(ab.Log, "stop timed out")
   256  				return nil
   257  			case flush = <-ab.flushes:
   258  				select {
   259  				case <-timeoutContext.Done():
   260  					logger.MaybeDebugf(ab.Log, "stop timed out")
   261  					return nil
   262  				case flushWorker = <-ab.flushWorkersReady:
   263  					flushWorker.Work <- flush
   264  				}
   265  			}
   266  		}
   267  	}
   268  
   269  	workersStopped := make(chan struct{})
   270  	go func() {
   271  		defer close(workersStopped)
   272  		wg := sync.WaitGroup{}
   273  		wg.Add(len(ab.flushWorkers))
   274  		for index, worker := range ab.flushWorkers {
   275  			go func(i int, w *async.Worker) {
   276  				defer wg.Done()
   277  				logger.MaybeDebugf(ab.Log, "draining worker %d", i)
   278  				w.StopContext(timeoutContext)
   279  			}(index, worker)
   280  		}
   281  		wg.Wait()
   282  	}()
   283  
   284  	select {
   285  	case <-timeoutContext.Done():
   286  		logger.MaybeDebugf(ab.Log, "stop timed out")
   287  		return nil
   288  	case <-workersStopped:
   289  		return nil
   290  	}
   291  }
   292  
   293  // NotifyStarted implements graceful.Graceful.
   294  func (ab *Buffer) NotifyStarted() <-chan struct{} {
   295  	return ab.Latch.NotifyStarted()
   296  }
   297  
   298  // NotifyStopped implements graceful.Graceful.
   299  func (ab *Buffer) NotifyStopped() <-chan struct{} {
   300  	return ab.Latch.NotifyStopped()
   301  }
   302  
   303  // Add adds a new object to the buffer, blocking if it triggers a flush.
   304  // If the buffer is full, it will call the flush handler on a separate goroutine.
   305  func (ab *Buffer) Add(ctx context.Context, obj interface{}) {
   306  	if ab.Tracer != nil {
   307  		finisher := ab.Tracer.StartAdd(ctx)
   308  		defer finisher.Finish(nil)
   309  	}
   310  	var bufferLength int
   311  	if ab.Stats != nil {
   312  		ab.maybeStatCount(ctx, MetricAdd, 1)
   313  		start := time.Now().UTC()
   314  		defer func() {
   315  			ab.maybeStatGauge(ctx, MetricBufferLength, float64(bufferLength))
   316  			ab.maybeStatElapsed(ctx, MetricAddElapsed, start)
   317  		}()
   318  	}
   319  
   320  	var flush []interface{}
   321  	ab.contentsMu.Lock()
   322  	bufferLength = ab.contents.Len()
   323  	ab.contents.Enqueue(obj)
   324  	if ab.contents.Len() >= ab.MaxLen {
   325  		flush = ab.contents.Drain()
   326  	}
   327  	ab.contentsMu.Unlock()
   328  	ab.unsafeFlushAsync(ctx, flush)
   329  }
   330  
   331  // AddMany adds many objects to the buffer at once.
   332  func (ab *Buffer) AddMany(ctx context.Context, objs ...interface{}) {
   333  	if ab.Tracer != nil {
   334  		finisher := ab.Tracer.StartAddMany(ctx)
   335  		defer finisher.Finish(nil)
   336  	}
   337  	var bufferLength int
   338  	if ab.Stats != nil {
   339  		ab.maybeStatCount(ctx, MetricAddMany, 1)
   340  		ab.maybeStatCount(ctx, MetricAddManyItemCount, len(objs))
   341  		start := time.Now().UTC()
   342  		defer func() {
   343  			ab.maybeStatGauge(ctx, MetricBufferLength, float64(bufferLength))
   344  			ab.maybeStatElapsed(ctx, MetricAddManyElapsed, start)
   345  		}()
   346  	}
   347  
   348  	var flushes [][]interface{}
   349  	ab.contentsMu.Lock()
   350  	bufferLength = ab.contents.Len()
   351  	for _, obj := range objs {
   352  		ab.contents.Enqueue(obj)
   353  		if ab.contents.Len() >= ab.MaxLen {
   354  			flushes = append(flushes, ab.contents.Drain())
   355  		}
   356  	}
   357  	ab.contentsMu.Unlock()
   358  	for _, flush := range flushes {
   359  		ab.unsafeFlushAsync(ctx, flush)
   360  	}
   361  }
   362  
   363  // FlushAsync clears the buffer, if a handler is provided it is passed the contents of the buffer.
   364  // This call is asynchronous, in that it will call the flush handler on its own goroutine.
   365  func (ab *Buffer) FlushAsync(ctx context.Context) error {
   366  	ab.contentsMu.Lock()
   367  	contents := ab.contents.Drain()
   368  	ab.contentsMu.Unlock()
   369  	ab.unsafeFlushAsync(ctx, contents)
   370  	return nil
   371  }
   372  
   373  // workerAction is called by the  workers.
   374  func (ab *Buffer) workerAction(ctx context.Context, obj interface{}) (err error) {
   375  	typed, ok := obj.(Flush)
   376  	if !ok {
   377  		return fmt.Errorf("autoflush buffer; worker action argument not autoflush.Flush")
   378  	}
   379  	if ab.Tracer != nil {
   380  		var finisher TraceFinisher
   381  		ctx, finisher = ab.Tracer.StartFlush(ctx)
   382  		defer finisher.Finish(err)
   383  	}
   384  	if ab.Stats != nil {
   385  		ab.maybeStatCount(ctx, MetricFlushHandler, 1)
   386  		start := time.Now().UTC()
   387  		defer func() { ab.maybeStatElapsed(ctx, MetricFlushHandlerElapsed, start) }()
   388  	}
   389  	err = ab.Handler(typed.Context, typed.Contents)
   390  	return
   391  }
   392  
   393  // returnFlushWorker returns a given worker to the worker queue.
   394  func (ab *Buffer) returnFlushWorker(ctx context.Context, worker *async.Worker) error {
   395  	ab.flushWorkersReady <- worker
   396  	return nil
   397  }
   398  
   399  // FlushAsync clears the buffer, if a handler is provided it is passed the contents of the buffer.
   400  // This call is asynchronous, in that it will call the flush handler on its own goroutine.
   401  func (ab *Buffer) unsafeFlushAsync(ctx context.Context, contents []interface{}) {
   402  	if len(contents) == 0 {
   403  		return
   404  	}
   405  	if ab.Tracer != nil {
   406  		finisher := ab.Tracer.StartQueueFlush(ctx)
   407  		defer finisher.Finish(nil)
   408  	}
   409  	if ab.Stats != nil {
   410  		ab.maybeStatCount(ctx, MetricFlush, 1)
   411  		ab.maybeStatGauge(ctx, MetricFlushQueueLength, float64(len(ab.flushes)))
   412  		ab.maybeStatCount(ctx, MetricFlushItemCount, len(contents))
   413  		start := time.Now().UTC()
   414  		defer func() {
   415  			ab.maybeStatElapsed(ctx, MetricFlushEnqueueElapsed, start)
   416  		}()
   417  	}
   418  
   419  	logger.MaybeDebugf(ab.Log, "autoflush buffer; queue flush, queue length: %d", len(ab.flushes))
   420  	ab.flushes <- Flush{
   421  		Context:  ctx,
   422  		Contents: contents,
   423  	}
   424  }
   425  
   426  func (ab *Buffer) maybeStatCount(ctx context.Context, metricName string, count int) {
   427  	if ab.Stats != nil {
   428  		_ = ab.Stats.Count(metricName, int64(count), ab.statTags(ctx)...)
   429  	}
   430  }
   431  
   432  func (ab *Buffer) maybeStatGauge(ctx context.Context, metricName string, gauge float64) {
   433  	if ab.Stats != nil {
   434  		_ = ab.Stats.Gauge(metricName, gauge, ab.statTags(ctx)...)
   435  	}
   436  }
   437  
   438  func (ab *Buffer) maybeStatElapsed(ctx context.Context, metricName string, start time.Time) {
   439  	if ab.Stats != nil {
   440  		elapsed := time.Now().UTC().Sub(start.UTC())
   441  		_ = ab.Stats.Gauge(metricName, timeutil.Milliseconds(elapsed), ab.statTags(ctx)...)
   442  		_ = ab.Stats.TimeInMilliseconds(metricName, elapsed, ab.statTags(ctx)...)
   443  		_ = ab.Stats.Distribution(metricName, timeutil.Milliseconds(elapsed), ab.statTags(ctx)...)
   444  	}
   445  }
   446  
   447  func (ab *Buffer) statTags(ctx context.Context) (tags []string) {
   448  	if ab.Log != nil {
   449  		ctx = ab.Log.ApplyContext(ctx)
   450  	}
   451  	labels := logger.GetLabels(ctx)
   452  	for key, value := range labels {
   453  		tags = append(tags, stats.Tag(key, value))
   454  	}
   455  	return
   456  }
   457  
   458  // Flush is an inflight flush attempt.
   459  type Flush struct {
   460  	Context  context.Context
   461  	Contents []interface{}
   462  }