go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/bundler.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bundler 16 17 import ( 18 "container/heap" 19 "context" 20 "fmt" 21 "sync" 22 "time" 23 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/logdog/api/logpb" 28 ) 29 30 // Config is the Bundler configuration. 31 type Config struct { 32 // Clock is the clock instance that will be used for Bundler and stream 33 // timing. 34 Clock clock.Clock 35 36 // MaxBufferedBytes is the maximum number of bytes to buffer in memory per 37 // stream. 38 MaxBufferedBytes int64 39 40 // MaxBundleSize is the maximum bundle size in bytes that may be generated. 41 // 42 // If this value is zero, no size constraint will be applied to generated 43 // bundles. 44 MaxBundleSize int 45 46 // MaxBufferDelay is the maximum amount of time we're willing to buffer 47 // bundled data. Other factors can cause the bundle to be sent before this, 48 // but it is an upper bound. 49 MaxBufferDelay time.Duration 50 } 51 52 type bundlerStream interface { 53 isDrained() bool 54 name() string 55 expireTime() (time.Time, bool) 56 nextBundleEntry(*builder, bool) bool 57 streamDesc() *logpb.LogStreamDescriptor 58 } 59 60 // Bundler is the main Bundler instance. It exposes goroutine-safe endpoints for 61 // stream registration and bundle consumption. 62 type Bundler struct { 63 c *Config 64 65 // finishedC is closed when makeBundles goroutine has terminated. 66 finishedC chan struct{} 67 bundleC chan *logpb.ButlerLogBundle 68 69 // streamsLock is a lock around the `streams` map and its contents. You must 70 // also hold this lock in order to push into streamsNotify. 71 streamsLock sync.Mutex 72 // streamsNotify has a buffer size of 1 and acts as a select-able semaphore. 73 streamsNotify chan struct{} 74 // streams is the set of currently-registered Streams. 75 streams map[string]bundlerStream 76 // flushing is true if we're blocking on CloseAndFlush(). 77 flushing bool 78 79 // prefixCounter is a global counter for Prefix-wide streams. 80 prefixCounter counter 81 } 82 83 // New instantiates a new Bundler instance. 84 func New(c Config) *Bundler { 85 b := Bundler{ 86 c: &c, 87 finishedC: make(chan struct{}), 88 bundleC: make(chan *logpb.ButlerLogBundle), 89 streams: map[string]bundlerStream{}, 90 streamsNotify: make(chan struct{}, 1), 91 } 92 93 go b.makeBundles() 94 return &b 95 } 96 97 // Register adds a new stream to the Bundler, returning a reference to the 98 // registered stream. 99 // 100 // The Bundler takes ownership of the supplied Properties, and may modify them 101 // as needed. 102 func (b *Bundler) Register(d *logpb.LogStreamDescriptor) (Stream, error) { 103 // Our Properties must validate. 104 if err := d.Validate(false); err != nil { 105 return nil, err 106 } 107 108 // Enforce that the log stream descriptor's Prefix is empty. 109 d.Prefix = "" 110 111 // Construct a parser for this stream. 112 c := streamConfig{ 113 name: d.Name, 114 template: logpb.ButlerLogBundle_Entry{ 115 Desc: d, 116 }, 117 maximumBufferDuration: b.c.MaxBufferDelay, 118 maximumBufferedBytes: b.c.MaxBufferedBytes, 119 onAppend: func(appended bool) { 120 if appended { 121 b.signalStreamUpdate() 122 } 123 }, 124 } 125 126 err := error(nil) 127 c.parser, err = newParser(d, &b.prefixCounter) 128 if err != nil { 129 return nil, fmt.Errorf("failed to create stream parser: %s", err) 130 } 131 132 b.streamsLock.Lock() 133 defer b.streamsLock.Unlock() 134 135 // Ensure that this is not a duplicate stream name. 136 if s := b.streams[d.Name]; s != nil { 137 return nil, fmt.Errorf("a Stream is already registered for %q", d.Name) 138 } 139 140 // Create a new stream. This will kick off its processing goroutine, which 141 // will not stop until it is closed. 142 s := newStream(c) 143 b.registerStreamLocked(s) 144 return s, nil 145 } 146 147 // GetStreamDescs returns the set of registered stream names mapped to their 148 // descriptors. 149 // 150 // This is intended for testing purposes. DO NOT modify the resulting 151 // descriptors. 152 func (b *Bundler) GetStreamDescs() map[string]*logpb.LogStreamDescriptor { 153 b.streamsLock.Lock() 154 defer b.streamsLock.Unlock() 155 156 if len(b.streams) == 0 { 157 return nil 158 } 159 160 streams := make(map[string]*logpb.LogStreamDescriptor, len(b.streams)) 161 for k, s := range b.streams { 162 streams[k] = s.streamDesc() 163 } 164 return streams 165 } 166 167 // CloseAndFlush closes the Bundler, alerting it that no more streams will be 168 // added and that existing data may be aggressively output. 169 // 170 // CloseAndFlush will block until all buffered data has been consumed. 171 func (b *Bundler) CloseAndFlush() { 172 // Mark that we're flushing. This will cause us to perform more aggressive 173 // bundling in Next(). 174 b.startFlushing() 175 <-b.finishedC 176 } 177 178 // Next returns the next bundle, blocking until it is available. 179 func (b *Bundler) Next() *logpb.ButlerLogBundle { 180 return <-b.bundleC 181 } 182 183 func (b *Bundler) startFlushing() { 184 b.streamsLock.Lock() 185 defer b.streamsLock.Unlock() 186 187 if !b.flushing { 188 b.flushing = true 189 } 190 b.signalStreamUpdateLocked() 191 } 192 193 // makeBundles is run in its own goroutine. It runs continuously, responding 194 // to Stream constraints and availability and sending ButlerLogBundles through 195 // bundleC when available. 196 // 197 // makeBundles will terminate when closeC is closed and all streams are drained. 198 func (b *Bundler) makeBundles() { 199 defer close(b.finishedC) 200 defer close(b.bundleC) 201 202 b.streamsLock.Lock() 203 defer b.streamsLock.Unlock() 204 205 var bb *builder 206 defer func() { 207 if bb != nil && bb.hasContent() { 208 b.bundleC <- bb.bundle() 209 } 210 }() 211 212 for { 213 bb = &builder{ 214 size: b.c.MaxBundleSize, 215 template: logpb.ButlerLogBundle{ 216 Timestamp: timestamppb.New(b.getClock().Now()), 217 }, 218 } 219 var oldestContentTime time.Time 220 221 for { 222 state := b.getStreamStateLocked() 223 224 // Attempt to create more bundles. 225 sendNow := b.bundleRoundLocked(bb, state) 226 227 // Prune and unregister any drained streams. 228 state.forEachStream(func(s bundlerStream) bool { 229 if s.isDrained() { 230 state.removeStream(s.name()) 231 b.unregisterStreamLocked(s) 232 } 233 234 return true 235 }) 236 237 if b.flushing && len(b.streams) == 0 { 238 // We're flushing, and there are no more registered streams, so we're 239 // completely finished. 240 // 241 // If we have any content in our builder, it will be exported via defer. 242 return 243 } 244 245 // If we have content, consider emitting this bundle. 246 if bb.hasContent() && (b.c.MaxBufferDelay == 0 || sendNow || bb.ready()) { 247 break 248 } 249 250 // Mark the first time this round where we actually saw data. 251 if oldestContentTime.IsZero() && bb.hasContent() { 252 oldestContentTime = state.now 253 } 254 255 // We will yield our stream lock and sleep, waiting for either: 256 // 1) The earliest expiration time. 257 // 2) A streams channel signal. 258 // 259 // We use a Cond here because we want Streams to be able to be added 260 // while we're waiting for stream data. 261 nextExpire, has := state.nextExpire() 262 263 // If we have an oldest content time, that also means that we have 264 // content. Factor this constraint in. 265 if !oldestContentTime.IsZero() { 266 roundExpire := oldestContentTime.Add(b.c.MaxBufferDelay) 267 if !roundExpire.After(state.now) { 268 break 269 } 270 271 if !has || roundExpire.Before(nextExpire) { 272 nextExpire = roundExpire 273 has = true 274 } 275 } 276 277 // If we had no data or expire constraints, wait indefinitely for 278 // something to change. 279 // 280 // This will release our state lock during switch execution. The lock will 281 // be held after the switch statement has finished. 282 switch { 283 case has && nextExpire.After(state.now): 284 // No immediate data, so block until the next known data expiration 285 // time. 286 cctx, cancel := context.WithDeadline(context.Background(), nextExpire) 287 b.streamsLock.Unlock() 288 select { 289 case <-b.streamsNotify: 290 case <-cctx.Done(): 291 } 292 b.streamsLock.Lock() 293 cancel() 294 295 case has: 296 // There is more data, and it has already expired, so go immediately. 297 break 298 299 default: 300 // No data, and no enqueued stream data, so block indefinitely until we 301 // get a signal. 302 b.streamsLock.Unlock() 303 <-b.streamsNotify 304 b.streamsLock.Lock() 305 } 306 } 307 308 // If our bundler has contents, send them. 309 if bb.hasContent() { 310 b.bundleC <- bb.bundle() 311 } 312 } 313 } 314 315 // Implements a single bundle building round. This incrementally adds data from 316 // the stream state to the supplied builder. 317 // 318 // This method will block until a suitable bundle is available. Availability 319 // is subject both to time and data constraints: 320 // - If buffered data, which is timestampped at ingest, has exceeded its 321 // buffer duration threshold, a Bundle will be cut immediately. 322 // - If no data is set to expire, the Bundler may wait for more data to 323 // produce a more optimally-packed bundle. 324 // 325 // At a high level, Next operates as follows: 326 // 327 // 1. Freeze all stream state. 328 // 329 // 2. Scan streams for data that has exceeded its threshold; if data is found: 330 // - Aggressively pack expired data into a Bundle until the stream is 331 // drained (which will be unregistered later) or can't generate a new 332 // bundle entry with the current data in the stream buffer (e.g. only 333 // partial size header exists in buffer). This will allow more data 334 // coming in when the stream is revisisted in the next bundle round. 335 // - Optimally pack the remainder of the Bundle with any available data. 336 // - Return the Bundle. 337 // 338 // 3. Examine the remaining data sizes, waiting for either: 339 // - Enough stream data to fill our Bundle. 340 // - Our timeout, if the Bundler is not closed. 341 // 342 // 4. Pack a Bundle with the remaining data optimally, emphasizing streams 343 // with older data. 344 // 345 // Returns true if bundle some data was added that should be sent immediately. 346 func (b *Bundler) bundleRoundLocked(bb *builder, state *streamState) bool { 347 sendNow := false 348 349 // First pass: non-blocking data that has exceeded its storage threshold. 350 for bb.remaining() > 0 { 351 s := state.next() 352 if s == nil || s.isDrained() { 353 break 354 } 355 356 if et, has := s.expireTime(); !has || et.After(state.now) { 357 // This stream (and all other streams, since we're sorted) expires in 358 // the future, so we're done with the first pass. 359 break 360 } 361 362 // Pull bundles from this stream. 363 if modified := s.nextBundleEntry(bb, true); modified { 364 state.streamUpdated(s.name()) 365 366 // We have at least one time-sensitive bundle, so send this round. 367 sendNow = true 368 } else { 369 // Remove the stream from current stream snapshot, the stream will be 370 // skipped in this round to allow more data coming in. 371 state.removeStream(s.name()) 372 } 373 374 if s.isDrained() { 375 state.removeStream(s.name()) 376 b.unregisterStreamLocked(s) 377 } 378 } 379 380 // Second pass: bundle any available data. 381 state.forEachStream(func(s bundlerStream) bool { 382 if bb.remaining() == 0 { 383 return false 384 } 385 386 if modified := s.nextBundleEntry(bb, b.flushing); modified { 387 state.streamUpdated(s.name()) 388 } 389 return true 390 }) 391 392 return sendNow 393 } 394 395 func (b *Bundler) getStreamStateLocked() *streamState { 396 // Lock and collect each stream. 397 state := &streamState{ 398 streams: make([]bundlerStream, 0, len(b.streams)), 399 now: b.getClock().Now(), 400 } 401 402 for _, s := range b.streams { 403 state.streams = append(state.streams, s) 404 } 405 heap.Init(state) 406 407 return state 408 } 409 410 func (b *Bundler) registerStreamLocked(s bundlerStream) { 411 b.streams[s.name()] = s 412 b.signalStreamUpdateLocked() 413 } 414 415 func (b *Bundler) unregisterStreamLocked(s bundlerStream) { 416 delete(b.streams, s.name()) 417 } 418 419 func (b *Bundler) signalStreamUpdate() { 420 b.streamsLock.Lock() 421 defer b.streamsLock.Unlock() 422 423 b.signalStreamUpdateLocked() 424 } 425 426 func (b *Bundler) signalStreamUpdateLocked() { 427 select { 428 case b.streamsNotify <- struct{}{}: 429 default: 430 } 431 } 432 433 func (b *Bundler) getClock() clock.Clock { 434 c := b.c.Clock 435 if c != nil { 436 return c 437 } 438 return clock.GetSystemClock() 439 } 440 441 // streamState is a snapshot of the current stream registration. All operations 442 // performed on the state require streamLock to be held. 443 // 444 // streamState implements heap.Interface for its streams array. Streams without 445 // data times (nil) are considered to be greater than those with times. 446 type streamState struct { 447 streams []bundlerStream 448 now time.Time 449 } 450 451 var _ heap.Interface = (*streamState)(nil) 452 453 func (s *streamState) next() bundlerStream { 454 if len(s.streams) == 0 { 455 return nil 456 } 457 return s.streams[0] 458 } 459 460 func (s *streamState) nextExpire() (time.Time, bool) { 461 if next := s.next(); next != nil { 462 if ts, ok := next.expireTime(); ok { 463 return ts, true 464 } 465 } 466 return time.Time{}, false 467 } 468 469 func (s *streamState) streamUpdated(name string) { 470 if si, idx := s.streamIndex(name); si != nil { 471 heap.Fix(s, idx) 472 } 473 } 474 475 func (s *streamState) forEachStream(f func(bundlerStream) bool) { 476 // Clone our streams, since the callback may mutate their order. 477 streams := make([]bundlerStream, len(s.streams)) 478 for i, s := range s.streams { 479 streams[i] = s 480 } 481 482 for _, s := range streams { 483 if !f(s) { 484 break 485 } 486 } 487 } 488 489 // removeStream removes a stream from the stream state. 490 func (s *streamState) removeStream(name string) bundlerStream { 491 if si, idx := s.streamIndex(name); si != nil { 492 heap.Remove(s, idx) 493 return si 494 } 495 return nil 496 } 497 498 func (s *streamState) streamIndex(name string) (bundlerStream, int) { 499 for i, si := range s.streams { 500 if si.name() == name { 501 return si, i 502 } 503 } 504 return nil, -1 505 } 506 507 func (s *streamState) Len() int { 508 return len(s.streams) 509 } 510 511 func (s *streamState) Less(i, j int) bool { 512 si, sj := s.streams[i], s.streams[j] 513 514 if it, ok := si.expireTime(); ok { 515 if jt, ok := sj.expireTime(); ok { 516 return it.Before(jt) 517 } 518 519 // i has data, but j does not, so i < j. 520 return true 521 } 522 523 // i has no data, so i us greater than all other streams. 524 return false 525 } 526 527 func (s *streamState) Swap(i, j int) { 528 s.streams[i], s.streams[j] = s.streams[j], s.streams[i] 529 } 530 531 func (s *streamState) Push(x any) { 532 s.streams = append(s.streams, x.(bundlerStream)) 533 } 534 535 func (s *streamState) Pop() any { 536 last := s.streams[len(s.streams)-1] 537 s.streams = s.streams[:len(s.streams)-1] 538 return last 539 }