github.com/m3db/m3@v1.5.0/src/msg/producer/buffer/buffer.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package buffer 22 23 import ( 24 "container/list" 25 "errors" 26 "strconv" 27 "sync" 28 "time" 29 30 "github.com/m3db/m3/src/msg/producer" 31 "github.com/m3db/m3/src/x/instrument" 32 "github.com/m3db/m3/src/x/retry" 33 34 "github.com/uber-go/tally" 35 "go.uber.org/atomic" 36 ) 37 38 var ( 39 emptyStruct = struct{}{} 40 41 // ErrBufferFull is returned when the buffer is full. 42 ErrBufferFull = errors.New("buffer full") 43 44 errBufferClosed = errors.New("buffer closed") 45 errMessageTooLarge = errors.New("message size larger than allowed") 46 errCleanupNoProgress = errors.New("buffer cleanup no progress") 47 ) 48 49 type bufferMetrics struct { 50 messageDropped counterPerNumRefBuckets 51 byteDropped counterPerNumRefBuckets 52 messageTooLarge tally.Counter 53 cleanupNoProgress tally.Counter 54 dropOldestSync tally.Counter 55 dropOldestAsync tally.Counter 56 messageBuffered tally.Gauge 57 byteBuffered tally.Gauge 58 bufferScanBatch tally.Timer 59 bytesAdded tally.Counter 60 bytesRemoved tally.Counter 61 } 62 63 type counterPerNumRefBuckets struct { 64 buckets []counterPerNumRefBucket 65 unknownBucket tally.Counter 66 } 67 68 type counterPerNumRefBucket struct { 69 // numRef is the counter for the number of references at time of count 70 // of the ref counted message. 71 numRef int 72 // counter is the actual counter for this bucket. 73 counter tally.Counter 74 } 75 76 func newCounterPerNumRefBuckets( 77 scope tally.Scope, 78 name string, 79 n int, 80 ) counterPerNumRefBuckets { 81 buckets := make([]counterPerNumRefBucket, 0, n) 82 for i := 0; i < n; i++ { 83 buckets = append(buckets, counterPerNumRefBucket{ 84 numRef: i, 85 counter: scope.Tagged(map[string]string{ 86 "num-replicas": strconv.Itoa(i), 87 }).Counter(name), 88 }) 89 } 90 return counterPerNumRefBuckets{ 91 buckets: buckets, 92 unknownBucket: scope.Tagged(map[string]string{ 93 "num-replicas": "unknown", 94 }).Counter(name), 95 } 96 } 97 98 func (c counterPerNumRefBuckets) Inc(numRef int32, delta int64) { 99 for _, b := range c.buckets { 100 if b.numRef == int(numRef) { 101 b.counter.Inc(delta) 102 return 103 } 104 } 105 c.unknownBucket.Inc(delta) 106 } 107 108 func newBufferMetrics( 109 scope tally.Scope, 110 opts instrument.TimerOptions, 111 ) bufferMetrics { 112 return bufferMetrics{ 113 messageDropped: newCounterPerNumRefBuckets(scope, "buffer-message-dropped", 10), 114 byteDropped: newCounterPerNumRefBuckets(scope, "buffer-byte-dropped", 10), 115 messageTooLarge: scope.Counter("message-too-large"), 116 cleanupNoProgress: scope.Counter("cleanup-no-progress"), 117 dropOldestSync: scope.Counter("drop-oldest-sync"), 118 dropOldestAsync: scope.Counter("drop-oldest-async"), 119 messageBuffered: scope.Gauge("message-buffered"), 120 byteBuffered: scope.Gauge("byte-buffered"), 121 bufferScanBatch: instrument.NewTimer(scope, "buffer-scan-batch", opts), 122 bytesAdded: scope.Counter("buffer-bytes-added"), 123 bytesRemoved: scope.Counter("buffer-bytes-removed"), 124 } 125 } 126 127 // nolint: maligned 128 type buffer struct { 129 sync.RWMutex 130 131 listLock sync.RWMutex 132 bufferList *list.List 133 opts Options 134 maxBufferSize uint64 135 maxSpilloverSize uint64 136 maxMessageSize int 137 onFinalizeFn producer.OnFinalizeFn 138 retrier retry.Retrier 139 m bufferMetrics 140 141 size *atomic.Uint64 142 isClosed bool 143 dropOldestCh chan struct{} 144 doneCh chan struct{} 145 forceDrop bool 146 wg sync.WaitGroup 147 } 148 149 // NewBuffer returns a new buffer. 150 func NewBuffer(opts Options) (producer.Buffer, error) { 151 if opts == nil { 152 opts = NewOptions() 153 } 154 if err := opts.Validate(); err != nil { 155 return nil, err 156 } 157 maxBufferSize := uint64(opts.MaxBufferSize()) 158 allowedSpillover := float64(maxBufferSize) * opts.AllowedSpilloverRatio() 159 b := &buffer{ 160 bufferList: list.New(), 161 maxBufferSize: maxBufferSize, 162 maxSpilloverSize: uint64(allowedSpillover) + maxBufferSize, 163 maxMessageSize: opts.MaxMessageSize(), 164 opts: opts, 165 retrier: retry.NewRetrier(opts.CleanupRetryOptions()), 166 m: newBufferMetrics( 167 opts.InstrumentOptions().MetricsScope(), 168 opts.InstrumentOptions().TimerOptions(), 169 ), 170 size: atomic.NewUint64(0), 171 isClosed: false, 172 dropOldestCh: make(chan struct{}, 1), 173 doneCh: make(chan struct{}), 174 } 175 b.onFinalizeFn = b.subSize 176 return b, nil 177 } 178 179 func (b *buffer) Add(m producer.Message) (*producer.RefCountedMessage, error) { 180 s := m.Size() 181 b.m.bytesAdded.Inc(int64(s)) 182 if s > b.maxMessageSize { 183 b.m.messageTooLarge.Inc(1) 184 return nil, errMessageTooLarge 185 } 186 b.RLock() 187 if b.isClosed { 188 b.RUnlock() 189 return nil, errBufferClosed 190 } 191 messageSize := uint64(s) 192 newBufferSize := b.size.Add(messageSize) 193 if newBufferSize > b.maxBufferSize { 194 if err := b.produceOnFull(newBufferSize, messageSize); err != nil { 195 b.RUnlock() 196 return nil, err 197 } 198 } 199 rm := producer.NewRefCountedMessage(m, b.onFinalizeFn) 200 b.listLock.Lock() 201 b.bufferList.PushBack(rm) 202 b.listLock.Unlock() 203 b.RUnlock() 204 return rm, nil 205 } 206 207 func (b *buffer) produceOnFull(newBufferSize uint64, messageSize uint64) error { 208 switch b.opts.OnFullStrategy() { 209 case ReturnError: 210 b.size.Sub(messageSize) 211 return ErrBufferFull 212 case DropOldest: 213 if newBufferSize >= b.maxSpilloverSize { 214 // The size after the write reached max allowed spill over size. 215 // We have to clean up the buffer synchronizely to make room for 216 // the new write. 217 b.dropOldestUntilTarget(b.maxBufferSize) 218 b.m.dropOldestSync.Inc(1) 219 return nil 220 } 221 // The new message is within the allowed spill over range, clean up 222 // the buffer asynchronizely. 223 select { 224 case b.dropOldestCh <- emptyStruct: 225 default: 226 } 227 b.m.dropOldestAsync.Inc(1) 228 } 229 return nil 230 } 231 232 func (b *buffer) Init() { 233 b.wg.Add(1) 234 go func() { 235 b.cleanupUntilClose() 236 b.wg.Done() 237 }() 238 239 if b.opts.OnFullStrategy() != DropOldest { 240 return 241 } 242 b.wg.Add(1) 243 go func() { 244 b.dropOldestUntilClose() 245 b.wg.Done() 246 }() 247 } 248 249 func (b *buffer) cleanupUntilClose() { 250 ticker := time.NewTicker( 251 b.opts.CleanupRetryOptions().InitialBackoff(), 252 ) 253 defer ticker.Stop() 254 255 continueFn := func(int) bool { 256 select { 257 case <-b.doneCh: 258 return false 259 default: 260 return true 261 } 262 } 263 for { 264 select { 265 case <-ticker.C: 266 b.retrier.AttemptWhile( 267 continueFn, 268 b.cleanup, 269 ) 270 case <-b.doneCh: 271 return 272 } 273 } 274 } 275 276 func (b *buffer) cleanup() error { 277 b.listLock.RLock() 278 e := b.bufferList.Front() 279 b.listLock.RUnlock() 280 b.RLock() 281 forceDrop := b.forceDrop 282 b.RUnlock() 283 var ( 284 batchSize = b.opts.ScanBatchSize() 285 totalRemoved int 286 batchRemoved int 287 ) 288 for e != nil { 289 beforeBatch := time.Now() 290 // NB: There is a chance the start element could be removed by another 291 // thread since the lock will be released between scan batch. 292 // For example when the there is a slow/dead consumer that is not 293 // consuming anything and caused buffer to be full, a new write could 294 // trigger dropOldest and remove elements from the front of the list. 295 // In this case, the batch starting from the removed element will do 296 // nothing and will finish the tick, which is good as this avoids the 297 // tick repeatedly scanning and doing nothing because nothing is being 298 // consumed. 299 b.listLock.Lock() 300 e, batchRemoved = b.cleanupBatchWithListLock(e, batchSize, forceDrop) 301 b.listLock.Unlock() 302 b.m.bufferScanBatch.Record(time.Since(beforeBatch)) 303 totalRemoved += batchRemoved 304 } 305 b.m.messageBuffered.Update(float64(b.bufferLen())) 306 b.m.byteBuffered.Update(float64(b.size.Load())) 307 if totalRemoved == 0 { 308 b.m.cleanupNoProgress.Inc(1) 309 return errCleanupNoProgress 310 } 311 return nil 312 } 313 314 func (b *buffer) cleanupBatchWithListLock( 315 start *list.Element, 316 batchSize int, 317 forceDrop bool, 318 ) (*list.Element, int) { 319 var ( 320 iterated int 321 next *list.Element 322 removed int 323 ) 324 for e := start; e != nil; e = next { 325 iterated++ 326 if iterated > batchSize { 327 break 328 } 329 next = e.Next() 330 rm := e.Value.(*producer.RefCountedMessage) 331 if rm.IsDroppedOrConsumed() { 332 b.bufferList.Remove(e) 333 removed++ 334 continue 335 } 336 if !forceDrop { 337 continue 338 } 339 // There is a chance that the message is consumed right before 340 // the drop call which will lead drop to return false. 341 if rm.Drop() { 342 b.bufferList.Remove(e) 343 removed++ 344 345 numRef := rm.NumRef() 346 b.m.messageDropped.Inc(numRef, 1) 347 b.m.byteDropped.Inc(numRef, int64(rm.Size())) 348 } 349 } 350 return next, removed 351 } 352 353 func (b *buffer) dropOldestUntilClose() { 354 ticker := time.NewTicker(b.opts.DropOldestInterval()) 355 defer ticker.Stop() 356 357 for { 358 select { 359 case <-ticker.C: 360 select { 361 case <-b.dropOldestCh: 362 default: 363 continue 364 } 365 b.dropOldestUntilTarget(b.maxBufferSize) 366 case <-b.doneCh: 367 return 368 } 369 } 370 } 371 372 func (b *buffer) dropOldestUntilTarget(targetSize uint64) { 373 shouldContinue := true 374 for shouldContinue { 375 b.listLock.Lock() 376 shouldContinue = b.dropOldestBatchUntilTargetWithListLock(targetSize, b.opts.ScanBatchSize()) 377 b.listLock.Unlock() 378 } 379 } 380 381 func (b *buffer) dropOldestBatchUntilTargetWithListLock( 382 targetSize uint64, 383 batchSize int, 384 ) bool { 385 var ( 386 iterated int 387 e = b.bufferList.Front() 388 ) 389 for e != nil && b.size.Load() > targetSize { 390 iterated++ 391 if iterated > batchSize { 392 return true 393 } 394 next := e.Next() 395 rm := e.Value.(*producer.RefCountedMessage) 396 b.bufferList.Remove(e) 397 e = next 398 if rm.IsDroppedOrConsumed() { 399 continue 400 } 401 // There is a chance that the message is consumed right before 402 // the drop call which will lead drop to return false. 403 if rm.Drop() { 404 numRef := rm.NumRef() 405 b.m.messageDropped.Inc(numRef, 1) 406 b.m.byteDropped.Inc(numRef, int64(rm.Size())) 407 } 408 } 409 return false 410 } 411 412 func (b *buffer) Close(ct producer.CloseType) { 413 // Stop taking writes right away. 414 b.Lock() 415 if b.isClosed { 416 b.Unlock() 417 return 418 } 419 b.isClosed = true 420 if ct == producer.DropEverything { 421 b.forceDrop = true 422 } 423 b.Unlock() 424 b.waitUntilAllDataConsumed() 425 close(b.doneCh) 426 close(b.dropOldestCh) 427 b.wg.Wait() 428 } 429 430 func (b *buffer) waitUntilAllDataConsumed() { 431 if b.bufferLen() == 0 { 432 return 433 } 434 ticker := time.NewTicker(b.opts.CloseCheckInterval()) 435 defer ticker.Stop() 436 437 for range ticker.C { 438 if b.bufferLen() == 0 { 439 return 440 } 441 } 442 } 443 444 func (b *buffer) bufferLen() int { 445 b.listLock.RLock() 446 l := b.bufferList.Len() 447 b.listLock.RUnlock() 448 return l 449 } 450 451 func (b *buffer) subSize(rm *producer.RefCountedMessage) { 452 b.m.bytesRemoved.Inc(int64(rm.Size())) 453 b.size.Sub(rm.Size()) 454 }