github.com/blend/go-sdk@v1.20220411.3/autoflush/buffer.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package autoflush 9 10 import ( 11 "context" 12 "fmt" 13 "runtime" 14 "sync" 15 "time" 16 17 "github.com/blend/go-sdk/async" 18 "github.com/blend/go-sdk/collections" 19 "github.com/blend/go-sdk/ex" 20 "github.com/blend/go-sdk/logger" 21 "github.com/blend/go-sdk/stats" 22 "github.com/blend/go-sdk/timeutil" 23 ) 24 25 // New creates a new buffer. 26 func New(handler Action, options ...Option) *Buffer { 27 afb := Buffer{ 28 Latch: async.NewLatch(), 29 Handler: handler, 30 Parallelism: runtime.NumCPU(), 31 MaxFlushes: DefaultMaxFlushes, 32 MaxLen: DefaultMaxLen, 33 Interval: DefaultFlushInterval, 34 ShutdownGracePeriod: DefaultShutdownGracePeriod, 35 } 36 for _, option := range options { 37 option(&afb) 38 } 39 afb.contents = collections.NewRingBufferWithCapacity(afb.MaxLen) 40 return &afb 41 } 42 43 // Option is an option for auto-flush buffers. 44 type Option func(*Buffer) 45 46 // OptMaxFlushes sets the auto-flush buffer's maximum flush queue length. 47 func OptMaxFlushes(maxFlushes int) Option { 48 return func(afb *Buffer) { 49 afb.MaxFlushes = maxFlushes 50 } 51 } 52 53 // OptParallelism sets the auto-flush buffer's flush worker count. 54 func OptParallelism(parallelism int) Option { 55 return func(afb *Buffer) { 56 afb.Parallelism = parallelism 57 } 58 } 59 60 // OptMaxLen sets the auto-flush buffer's maximum length. 61 func OptMaxLen(maxLen int) Option { 62 return func(afb *Buffer) { 63 afb.MaxLen = maxLen 64 } 65 } 66 67 // OptInterval sets the auto-flush buffer's interval. 68 func OptInterval(d time.Duration) Option { 69 return func(afb *Buffer) { 70 afb.Interval = d 71 } 72 } 73 74 // OptContext sets the auto-flush buffer's context. 75 func OptContext(ctx context.Context) Option { 76 return func(afb *Buffer) { 77 afb.Context = ctx 78 } 79 } 80 81 // OptErrors sets the auto-flush buffer's error return channel. 82 func OptErrors(errors chan error) Option { 83 return func(afb *Buffer) { 84 afb.Errors = errors 85 } 86 } 87 88 // OptShutdownGracePeriod sets the auto-flush buffer's shutdown grace period. 89 func OptShutdownGracePeriod(shutdownGracePeriod time.Duration) Option { 90 return func(afb *Buffer) { 91 afb.ShutdownGracePeriod = shutdownGracePeriod 92 } 93 } 94 95 // OptLog sets the Buffer logger. 96 func OptLog(log logger.Log) Option { 97 return func(afb *Buffer) { 98 afb.Log = log 99 } 100 } 101 102 // OptStats sets the Buffer stats collector. 103 func OptStats(stats stats.Collector) Option { 104 return func(afb *Buffer) { 105 afb.Stats = stats 106 } 107 } 108 109 // OptTracer sets the Buffer logger. 110 func OptTracer(tracer Tracer) Option { 111 return func(afb *Buffer) { 112 afb.Tracer = tracer 113 } 114 } 115 116 // Action is an action called by an buffer. 117 type Action func(context.Context, []interface{}) error 118 119 // Buffer is a backing store that operates either on a fixed length flush or a fixed interval flush. 120 // A handler should be provided but without one the buffer will just clear. 121 // Adds that would cause fixed length flushes do not block on the flush handler. 122 type Buffer struct { 123 Latch *async.Latch 124 Context context.Context 125 126 Log logger.Log 127 Stats stats.Collector 128 Tracer Tracer 129 130 MaxLen int 131 Interval time.Duration 132 Parallelism int 133 MaxFlushes int 134 ShutdownGracePeriod time.Duration 135 136 contentsMu sync.Mutex 137 contents *collections.RingBuffer 138 139 Handler Action 140 Errors chan error 141 142 intervalWorker *async.Interval 143 flushes chan Flush 144 flushWorkersReady chan *async.Worker 145 flushWorkers []*async.Worker 146 } 147 148 // Background returns a background context. 149 func (ab *Buffer) Background() context.Context { 150 if ab.Context != nil { 151 return ab.Context 152 } 153 return context.Background() 154 } 155 156 //Start starts the auto-flush buffer. 157 /* 158 This call blocks. To call it asynchronously: 159 160 go afb.Start() 161 <-afb.NotifyStarted() 162 */ 163 func (ab *Buffer) Start() error { 164 if !ab.Latch.CanStart() { 165 return ex.New(async.ErrCannotStart) 166 } 167 ab.Latch.Starting() 168 169 ab.flushes = make(chan Flush, ab.MaxFlushes) 170 ab.flushWorkers = make([]*async.Worker, ab.Parallelism) 171 ab.flushWorkersReady = make(chan *async.Worker, ab.Parallelism) 172 ab.intervalWorker = async.NewInterval(ab.FlushAsync, ab.Interval, async.OptIntervalErrors(ab.Errors)) 173 174 for x := 0; x < ab.Parallelism; x++ { 175 worker := async.NewWorker(ab.workerAction) 176 worker.Context = ab.Context 177 worker.Errors = ab.Errors 178 worker.Finalizer = ab.returnFlushWorker 179 go func() { _ = worker.Start() }() 180 <-worker.NotifyStarted() 181 ab.flushWorkers[x] = worker 182 ab.flushWorkersReady <- worker 183 } 184 go func() { _ = ab.intervalWorker.Start() }() 185 ab.Dispatch() 186 return nil 187 } 188 189 // Dispatch is the main run loop. 190 func (ab *Buffer) Dispatch() { 191 ab.Latch.Started() 192 193 var stopping <-chan struct{} 194 var flushWorker *async.Worker 195 var flush Flush 196 for { 197 stopping = ab.Latch.NotifyStopping() 198 select { 199 case <-stopping: 200 ab.Latch.Stopped() 201 return 202 default: 203 } 204 select { 205 case flush = <-ab.flushes: 206 select { 207 case flushWorker = <-ab.flushWorkersReady: 208 flushWorker.Work <- flush 209 case <-stopping: 210 ab.flushes <- flush 211 ab.Latch.Stopped() 212 return 213 } 214 case <-stopping: 215 ab.Latch.Stopped() 216 return 217 } 218 } 219 } 220 221 // Stop stops the buffer flusher. 222 // 223 // Any in flight flushes will be given ShutdownGracePeriod amount of time. 224 // 225 // Stop is _very_ complicated. 226 func (ab *Buffer) Stop() error { 227 if !ab.Latch.CanStop() { 228 return ex.New(async.ErrCannotStop) 229 } 230 // stop the interval worker 231 ab.intervalWorker.WaitStopped() 232 233 // stop the running dispatch loop 234 ab.Latch.WaitStopped() 235 236 timeoutContext, cancel := context.WithTimeout(ab.Background(), ab.ShutdownGracePeriod) 237 defer cancel() 238 239 ab.contentsMu.Lock() 240 defer ab.contentsMu.Unlock() 241 if ab.contents.Len() > 0 { 242 ab.flushes <- Flush{ 243 Context: timeoutContext, 244 Contents: ab.contents.Drain(), 245 } 246 } 247 248 if remainingFlushes := len(ab.flushes); remainingFlushes > 0 { 249 logger.MaybeDebugf(ab.Log, "%d flushes remaining", remainingFlushes) 250 var flushWorker *async.Worker 251 var flush Flush 252 for x := 0; x < remainingFlushes; x++ { 253 select { 254 case <-timeoutContext.Done(): 255 logger.MaybeDebugf(ab.Log, "stop timed out") 256 return nil 257 case flush = <-ab.flushes: 258 select { 259 case <-timeoutContext.Done(): 260 logger.MaybeDebugf(ab.Log, "stop timed out") 261 return nil 262 case flushWorker = <-ab.flushWorkersReady: 263 flushWorker.Work <- flush 264 } 265 } 266 } 267 } 268 269 workersStopped := make(chan struct{}) 270 go func() { 271 defer close(workersStopped) 272 wg := sync.WaitGroup{} 273 wg.Add(len(ab.flushWorkers)) 274 for index, worker := range ab.flushWorkers { 275 go func(i int, w *async.Worker) { 276 defer wg.Done() 277 logger.MaybeDebugf(ab.Log, "draining worker %d", i) 278 w.StopContext(timeoutContext) 279 }(index, worker) 280 } 281 wg.Wait() 282 }() 283 284 select { 285 case <-timeoutContext.Done(): 286 logger.MaybeDebugf(ab.Log, "stop timed out") 287 return nil 288 case <-workersStopped: 289 return nil 290 } 291 } 292 293 // NotifyStarted implements graceful.Graceful. 294 func (ab *Buffer) NotifyStarted() <-chan struct{} { 295 return ab.Latch.NotifyStarted() 296 } 297 298 // NotifyStopped implements graceful.Graceful. 299 func (ab *Buffer) NotifyStopped() <-chan struct{} { 300 return ab.Latch.NotifyStopped() 301 } 302 303 // Add adds a new object to the buffer, blocking if it triggers a flush. 304 // If the buffer is full, it will call the flush handler on a separate goroutine. 305 func (ab *Buffer) Add(ctx context.Context, obj interface{}) { 306 if ab.Tracer != nil { 307 finisher := ab.Tracer.StartAdd(ctx) 308 defer finisher.Finish(nil) 309 } 310 var bufferLength int 311 if ab.Stats != nil { 312 ab.maybeStatCount(ctx, MetricAdd, 1) 313 start := time.Now().UTC() 314 defer func() { 315 ab.maybeStatGauge(ctx, MetricBufferLength, float64(bufferLength)) 316 ab.maybeStatElapsed(ctx, MetricAddElapsed, start) 317 }() 318 } 319 320 var flush []interface{} 321 ab.contentsMu.Lock() 322 bufferLength = ab.contents.Len() 323 ab.contents.Enqueue(obj) 324 if ab.contents.Len() >= ab.MaxLen { 325 flush = ab.contents.Drain() 326 } 327 ab.contentsMu.Unlock() 328 ab.unsafeFlushAsync(ctx, flush) 329 } 330 331 // AddMany adds many objects to the buffer at once. 332 func (ab *Buffer) AddMany(ctx context.Context, objs ...interface{}) { 333 if ab.Tracer != nil { 334 finisher := ab.Tracer.StartAddMany(ctx) 335 defer finisher.Finish(nil) 336 } 337 var bufferLength int 338 if ab.Stats != nil { 339 ab.maybeStatCount(ctx, MetricAddMany, 1) 340 ab.maybeStatCount(ctx, MetricAddManyItemCount, len(objs)) 341 start := time.Now().UTC() 342 defer func() { 343 ab.maybeStatGauge(ctx, MetricBufferLength, float64(bufferLength)) 344 ab.maybeStatElapsed(ctx, MetricAddManyElapsed, start) 345 }() 346 } 347 348 var flushes [][]interface{} 349 ab.contentsMu.Lock() 350 bufferLength = ab.contents.Len() 351 for _, obj := range objs { 352 ab.contents.Enqueue(obj) 353 if ab.contents.Len() >= ab.MaxLen { 354 flushes = append(flushes, ab.contents.Drain()) 355 } 356 } 357 ab.contentsMu.Unlock() 358 for _, flush := range flushes { 359 ab.unsafeFlushAsync(ctx, flush) 360 } 361 } 362 363 // FlushAsync clears the buffer, if a handler is provided it is passed the contents of the buffer. 364 // This call is asynchronous, in that it will call the flush handler on its own goroutine. 365 func (ab *Buffer) FlushAsync(ctx context.Context) error { 366 ab.contentsMu.Lock() 367 contents := ab.contents.Drain() 368 ab.contentsMu.Unlock() 369 ab.unsafeFlushAsync(ctx, contents) 370 return nil 371 } 372 373 // workerAction is called by the workers. 374 func (ab *Buffer) workerAction(ctx context.Context, obj interface{}) (err error) { 375 typed, ok := obj.(Flush) 376 if !ok { 377 return fmt.Errorf("autoflush buffer; worker action argument not autoflush.Flush") 378 } 379 if ab.Tracer != nil { 380 var finisher TraceFinisher 381 ctx, finisher = ab.Tracer.StartFlush(ctx) 382 defer finisher.Finish(err) 383 } 384 if ab.Stats != nil { 385 ab.maybeStatCount(ctx, MetricFlushHandler, 1) 386 start := time.Now().UTC() 387 defer func() { ab.maybeStatElapsed(ctx, MetricFlushHandlerElapsed, start) }() 388 } 389 err = ab.Handler(typed.Context, typed.Contents) 390 return 391 } 392 393 // returnFlushWorker returns a given worker to the worker queue. 394 func (ab *Buffer) returnFlushWorker(ctx context.Context, worker *async.Worker) error { 395 ab.flushWorkersReady <- worker 396 return nil 397 } 398 399 // FlushAsync clears the buffer, if a handler is provided it is passed the contents of the buffer. 400 // This call is asynchronous, in that it will call the flush handler on its own goroutine. 401 func (ab *Buffer) unsafeFlushAsync(ctx context.Context, contents []interface{}) { 402 if len(contents) == 0 { 403 return 404 } 405 if ab.Tracer != nil { 406 finisher := ab.Tracer.StartQueueFlush(ctx) 407 defer finisher.Finish(nil) 408 } 409 if ab.Stats != nil { 410 ab.maybeStatCount(ctx, MetricFlush, 1) 411 ab.maybeStatGauge(ctx, MetricFlushQueueLength, float64(len(ab.flushes))) 412 ab.maybeStatCount(ctx, MetricFlushItemCount, len(contents)) 413 start := time.Now().UTC() 414 defer func() { 415 ab.maybeStatElapsed(ctx, MetricFlushEnqueueElapsed, start) 416 }() 417 } 418 419 logger.MaybeDebugf(ab.Log, "autoflush buffer; queue flush, queue length: %d", len(ab.flushes)) 420 ab.flushes <- Flush{ 421 Context: ctx, 422 Contents: contents, 423 } 424 } 425 426 func (ab *Buffer) maybeStatCount(ctx context.Context, metricName string, count int) { 427 if ab.Stats != nil { 428 _ = ab.Stats.Count(metricName, int64(count), ab.statTags(ctx)...) 429 } 430 } 431 432 func (ab *Buffer) maybeStatGauge(ctx context.Context, metricName string, gauge float64) { 433 if ab.Stats != nil { 434 _ = ab.Stats.Gauge(metricName, gauge, ab.statTags(ctx)...) 435 } 436 } 437 438 func (ab *Buffer) maybeStatElapsed(ctx context.Context, metricName string, start time.Time) { 439 if ab.Stats != nil { 440 elapsed := time.Now().UTC().Sub(start.UTC()) 441 _ = ab.Stats.Gauge(metricName, timeutil.Milliseconds(elapsed), ab.statTags(ctx)...) 442 _ = ab.Stats.TimeInMilliseconds(metricName, elapsed, ab.statTags(ctx)...) 443 _ = ab.Stats.Distribution(metricName, timeutil.Milliseconds(elapsed), ab.statTags(ctx)...) 444 } 445 } 446 447 func (ab *Buffer) statTags(ctx context.Context) (tags []string) { 448 if ab.Log != nil { 449 ctx = ab.Log.ApplyContext(ctx) 450 } 451 labels := logger.GetLabels(ctx) 452 for key, value := range labels { 453 tags = append(tags, stats.Tag(key, value)) 454 } 455 return 456 } 457 458 // Flush is an inflight flush attempt. 459 type Flush struct { 460 Context context.Context 461 Contents []interface{} 462 }