go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/bundler.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bundler
    16  
    17  import (
    18  	"container/heap"
    19  	"context"
    20  	"fmt"
    21  	"sync"
    22  	"time"
    23  
    24  	"google.golang.org/protobuf/types/known/timestamppb"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/logdog/api/logpb"
    28  )
    29  
    30  // Config is the Bundler configuration.
    31  type Config struct {
    32  	// Clock is the clock instance that will be used for Bundler and stream
    33  	// timing.
    34  	Clock clock.Clock
    35  
    36  	// MaxBufferedBytes is the maximum number of bytes to buffer in memory per
    37  	// stream.
    38  	MaxBufferedBytes int64
    39  
    40  	// MaxBundleSize is the maximum bundle size in bytes that may be generated.
    41  	//
    42  	// If this value is zero, no size constraint will be applied to generated
    43  	// bundles.
    44  	MaxBundleSize int
    45  
    46  	// MaxBufferDelay is the maximum amount of time we're willing to buffer
    47  	// bundled data. Other factors can cause the bundle to be sent before this,
    48  	// but it is an upper bound.
    49  	MaxBufferDelay time.Duration
    50  }
    51  
    52  type bundlerStream interface {
    53  	isDrained() bool
    54  	name() string
    55  	expireTime() (time.Time, bool)
    56  	nextBundleEntry(*builder, bool) bool
    57  	streamDesc() *logpb.LogStreamDescriptor
    58  }
    59  
    60  // Bundler is the main Bundler instance. It exposes goroutine-safe endpoints for
    61  // stream registration and bundle consumption.
    62  type Bundler struct {
    63  	c *Config
    64  
    65  	// finishedC is closed when makeBundles goroutine has terminated.
    66  	finishedC chan struct{}
    67  	bundleC   chan *logpb.ButlerLogBundle
    68  
    69  	// streamsLock is a lock around the `streams` map and its contents. You must
    70  	// also hold this lock in order to push into streamsNotify.
    71  	streamsLock sync.Mutex
    72  	// streamsNotify has a buffer size of 1 and acts as a select-able semaphore.
    73  	streamsNotify chan struct{}
    74  	// streams is the set of currently-registered Streams.
    75  	streams map[string]bundlerStream
    76  	// flushing is true if we're blocking on CloseAndFlush().
    77  	flushing bool
    78  
    79  	// prefixCounter is a global counter for Prefix-wide streams.
    80  	prefixCounter counter
    81  }
    82  
    83  // New instantiates a new Bundler instance.
    84  func New(c Config) *Bundler {
    85  	b := Bundler{
    86  		c:             &c,
    87  		finishedC:     make(chan struct{}),
    88  		bundleC:       make(chan *logpb.ButlerLogBundle),
    89  		streams:       map[string]bundlerStream{},
    90  		streamsNotify: make(chan struct{}, 1),
    91  	}
    92  
    93  	go b.makeBundles()
    94  	return &b
    95  }
    96  
    97  // Register adds a new stream to the Bundler, returning a reference to the
    98  // registered stream.
    99  //
   100  // The Bundler takes ownership of the supplied Properties, and may modify them
   101  // as needed.
   102  func (b *Bundler) Register(d *logpb.LogStreamDescriptor) (Stream, error) {
   103  	// Our Properties must validate.
   104  	if err := d.Validate(false); err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	// Enforce that the log stream descriptor's Prefix is empty.
   109  	d.Prefix = ""
   110  
   111  	// Construct a parser for this stream.
   112  	c := streamConfig{
   113  		name: d.Name,
   114  		template: logpb.ButlerLogBundle_Entry{
   115  			Desc: d,
   116  		},
   117  		maximumBufferDuration: b.c.MaxBufferDelay,
   118  		maximumBufferedBytes:  b.c.MaxBufferedBytes,
   119  		onAppend: func(appended bool) {
   120  			if appended {
   121  				b.signalStreamUpdate()
   122  			}
   123  		},
   124  	}
   125  
   126  	err := error(nil)
   127  	c.parser, err = newParser(d, &b.prefixCounter)
   128  	if err != nil {
   129  		return nil, fmt.Errorf("failed to create stream parser: %s", err)
   130  	}
   131  
   132  	b.streamsLock.Lock()
   133  	defer b.streamsLock.Unlock()
   134  
   135  	// Ensure that this is not a duplicate stream name.
   136  	if s := b.streams[d.Name]; s != nil {
   137  		return nil, fmt.Errorf("a Stream is already registered for %q", d.Name)
   138  	}
   139  
   140  	// Create a new stream. This will kick off its processing goroutine, which
   141  	// will not stop until it is closed.
   142  	s := newStream(c)
   143  	b.registerStreamLocked(s)
   144  	return s, nil
   145  }
   146  
   147  // GetStreamDescs returns the set of registered stream names mapped to their
   148  // descriptors.
   149  //
   150  // This is intended for testing purposes. DO NOT modify the resulting
   151  // descriptors.
   152  func (b *Bundler) GetStreamDescs() map[string]*logpb.LogStreamDescriptor {
   153  	b.streamsLock.Lock()
   154  	defer b.streamsLock.Unlock()
   155  
   156  	if len(b.streams) == 0 {
   157  		return nil
   158  	}
   159  
   160  	streams := make(map[string]*logpb.LogStreamDescriptor, len(b.streams))
   161  	for k, s := range b.streams {
   162  		streams[k] = s.streamDesc()
   163  	}
   164  	return streams
   165  }
   166  
   167  // CloseAndFlush closes the Bundler, alerting it that no more streams will be
   168  // added and that existing data may be aggressively output.
   169  //
   170  // CloseAndFlush will block until all buffered data has been consumed.
   171  func (b *Bundler) CloseAndFlush() {
   172  	// Mark that we're flushing. This will cause us to perform more aggressive
   173  	// bundling in Next().
   174  	b.startFlushing()
   175  	<-b.finishedC
   176  }
   177  
   178  // Next returns the next bundle, blocking until it is available.
   179  func (b *Bundler) Next() *logpb.ButlerLogBundle {
   180  	return <-b.bundleC
   181  }
   182  
   183  func (b *Bundler) startFlushing() {
   184  	b.streamsLock.Lock()
   185  	defer b.streamsLock.Unlock()
   186  
   187  	if !b.flushing {
   188  		b.flushing = true
   189  	}
   190  	b.signalStreamUpdateLocked()
   191  }
   192  
   193  // makeBundles is run in its own goroutine. It runs continuously, responding
   194  // to Stream constraints and availability and sending ButlerLogBundles through
   195  // bundleC when available.
   196  //
   197  // makeBundles will terminate when closeC is closed and all streams are drained.
   198  func (b *Bundler) makeBundles() {
   199  	defer close(b.finishedC)
   200  	defer close(b.bundleC)
   201  
   202  	b.streamsLock.Lock()
   203  	defer b.streamsLock.Unlock()
   204  
   205  	var bb *builder
   206  	defer func() {
   207  		if bb != nil && bb.hasContent() {
   208  			b.bundleC <- bb.bundle()
   209  		}
   210  	}()
   211  
   212  	for {
   213  		bb = &builder{
   214  			size: b.c.MaxBundleSize,
   215  			template: logpb.ButlerLogBundle{
   216  				Timestamp: timestamppb.New(b.getClock().Now()),
   217  			},
   218  		}
   219  		var oldestContentTime time.Time
   220  
   221  		for {
   222  			state := b.getStreamStateLocked()
   223  
   224  			// Attempt to create more bundles.
   225  			sendNow := b.bundleRoundLocked(bb, state)
   226  
   227  			// Prune and unregister any drained streams.
   228  			state.forEachStream(func(s bundlerStream) bool {
   229  				if s.isDrained() {
   230  					state.removeStream(s.name())
   231  					b.unregisterStreamLocked(s)
   232  				}
   233  
   234  				return true
   235  			})
   236  
   237  			if b.flushing && len(b.streams) == 0 {
   238  				// We're flushing, and there are no more registered streams, so we're
   239  				// completely finished.
   240  				//
   241  				// If we have any content in our builder, it will be exported via defer.
   242  				return
   243  			}
   244  
   245  			// If we have content, consider emitting this bundle.
   246  			if bb.hasContent() && (b.c.MaxBufferDelay == 0 || sendNow || bb.ready()) {
   247  				break
   248  			}
   249  
   250  			// Mark the first time this round where we actually saw data.
   251  			if oldestContentTime.IsZero() && bb.hasContent() {
   252  				oldestContentTime = state.now
   253  			}
   254  
   255  			// We will yield our stream lock and sleep, waiting for either:
   256  			// 1) The earliest expiration time.
   257  			// 2) A streams channel signal.
   258  			//
   259  			// We use a Cond here because we want Streams to be able to be added
   260  			// while we're waiting for stream data.
   261  			nextExpire, has := state.nextExpire()
   262  
   263  			// If we have an oldest content time, that also means that we have
   264  			// content. Factor this constraint in.
   265  			if !oldestContentTime.IsZero() {
   266  				roundExpire := oldestContentTime.Add(b.c.MaxBufferDelay)
   267  				if !roundExpire.After(state.now) {
   268  					break
   269  				}
   270  
   271  				if !has || roundExpire.Before(nextExpire) {
   272  					nextExpire = roundExpire
   273  					has = true
   274  				}
   275  			}
   276  
   277  			// If we had no data or expire constraints, wait indefinitely for
   278  			// something to change.
   279  			//
   280  			// This will release our state lock during switch execution. The lock will
   281  			// be held after the switch statement has finished.
   282  			switch {
   283  			case has && nextExpire.After(state.now):
   284  				// No immediate data, so block until the next known data expiration
   285  				// time.
   286  				cctx, cancel := context.WithDeadline(context.Background(), nextExpire)
   287  				b.streamsLock.Unlock()
   288  				select {
   289  				case <-b.streamsNotify:
   290  				case <-cctx.Done():
   291  				}
   292  				b.streamsLock.Lock()
   293  				cancel()
   294  
   295  			case has:
   296  				// There is more data, and it has already expired, so go immediately.
   297  				break
   298  
   299  			default:
   300  				// No data, and no enqueued stream data, so block indefinitely until we
   301  				// get a signal.
   302  				b.streamsLock.Unlock()
   303  				<-b.streamsNotify
   304  				b.streamsLock.Lock()
   305  			}
   306  		}
   307  
   308  		// If our bundler has contents, send them.
   309  		if bb.hasContent() {
   310  			b.bundleC <- bb.bundle()
   311  		}
   312  	}
   313  }
   314  
   315  // Implements a single bundle building round. This incrementally adds data from
   316  // the stream state to the supplied builder.
   317  //
   318  // This method will block until a suitable bundle is available. Availability
   319  // is subject both to time and data constraints:
   320  //   - If buffered data, which is timestampped at ingest, has exceeded its
   321  //     buffer duration threshold, a Bundle will be cut immediately.
   322  //   - If no data is set to expire, the Bundler may wait for more data to
   323  //     produce a more optimally-packed bundle.
   324  //
   325  // At a high level, Next operates as follows:
   326  //
   327  //  1. Freeze all stream state.
   328  //
   329  //  2. Scan streams for data that has exceeded its threshold; if data is found:
   330  //     - Aggressively pack expired data into a Bundle until the stream is
   331  //     drained (which will be unregistered later) or can't generate a new
   332  //     bundle entry with the current data in the stream buffer (e.g. only
   333  //     partial size header exists in buffer). This will allow more data
   334  //     coming in when the stream is revisisted in the next bundle round.
   335  //     - Optimally pack the remainder of the Bundle with any available data.
   336  //     - Return the Bundle.
   337  //
   338  //  3. Examine the remaining data sizes, waiting for either:
   339  //     - Enough stream data to fill our Bundle.
   340  //     - Our timeout, if the Bundler is not closed.
   341  //
   342  //  4. Pack a Bundle with the remaining data optimally, emphasizing streams
   343  //     with older data.
   344  //
   345  // Returns true if bundle some data was added that should be sent immediately.
   346  func (b *Bundler) bundleRoundLocked(bb *builder, state *streamState) bool {
   347  	sendNow := false
   348  
   349  	// First pass: non-blocking data that has exceeded its storage threshold.
   350  	for bb.remaining() > 0 {
   351  		s := state.next()
   352  		if s == nil || s.isDrained() {
   353  			break
   354  		}
   355  
   356  		if et, has := s.expireTime(); !has || et.After(state.now) {
   357  			// This stream (and all other streams, since we're sorted) expires in
   358  			// the future, so we're done with the first pass.
   359  			break
   360  		}
   361  
   362  		// Pull bundles from this stream.
   363  		if modified := s.nextBundleEntry(bb, true); modified {
   364  			state.streamUpdated(s.name())
   365  
   366  			// We have at least one time-sensitive bundle, so send this round.
   367  			sendNow = true
   368  		} else {
   369  			// Remove the stream from current stream snapshot, the stream will be
   370  			// skipped in this round to allow more data coming in.
   371  			state.removeStream(s.name())
   372  		}
   373  
   374  		if s.isDrained() {
   375  			state.removeStream(s.name())
   376  			b.unregisterStreamLocked(s)
   377  		}
   378  	}
   379  
   380  	// Second pass: bundle any available data.
   381  	state.forEachStream(func(s bundlerStream) bool {
   382  		if bb.remaining() == 0 {
   383  			return false
   384  		}
   385  
   386  		if modified := s.nextBundleEntry(bb, b.flushing); modified {
   387  			state.streamUpdated(s.name())
   388  		}
   389  		return true
   390  	})
   391  
   392  	return sendNow
   393  }
   394  
   395  func (b *Bundler) getStreamStateLocked() *streamState {
   396  	// Lock and collect each stream.
   397  	state := &streamState{
   398  		streams: make([]bundlerStream, 0, len(b.streams)),
   399  		now:     b.getClock().Now(),
   400  	}
   401  
   402  	for _, s := range b.streams {
   403  		state.streams = append(state.streams, s)
   404  	}
   405  	heap.Init(state)
   406  
   407  	return state
   408  }
   409  
   410  func (b *Bundler) registerStreamLocked(s bundlerStream) {
   411  	b.streams[s.name()] = s
   412  	b.signalStreamUpdateLocked()
   413  }
   414  
   415  func (b *Bundler) unregisterStreamLocked(s bundlerStream) {
   416  	delete(b.streams, s.name())
   417  }
   418  
   419  func (b *Bundler) signalStreamUpdate() {
   420  	b.streamsLock.Lock()
   421  	defer b.streamsLock.Unlock()
   422  
   423  	b.signalStreamUpdateLocked()
   424  }
   425  
   426  func (b *Bundler) signalStreamUpdateLocked() {
   427  	select {
   428  	case b.streamsNotify <- struct{}{}:
   429  	default:
   430  	}
   431  }
   432  
   433  func (b *Bundler) getClock() clock.Clock {
   434  	c := b.c.Clock
   435  	if c != nil {
   436  		return c
   437  	}
   438  	return clock.GetSystemClock()
   439  }
   440  
   441  // streamState is a snapshot of the current stream registration. All operations
   442  // performed on the state require streamLock to be held.
   443  //
   444  // streamState implements heap.Interface for its streams array. Streams without
   445  // data times (nil) are considered to be greater than those with times.
   446  type streamState struct {
   447  	streams []bundlerStream
   448  	now     time.Time
   449  }
   450  
   451  var _ heap.Interface = (*streamState)(nil)
   452  
   453  func (s *streamState) next() bundlerStream {
   454  	if len(s.streams) == 0 {
   455  		return nil
   456  	}
   457  	return s.streams[0]
   458  }
   459  
   460  func (s *streamState) nextExpire() (time.Time, bool) {
   461  	if next := s.next(); next != nil {
   462  		if ts, ok := next.expireTime(); ok {
   463  			return ts, true
   464  		}
   465  	}
   466  	return time.Time{}, false
   467  }
   468  
   469  func (s *streamState) streamUpdated(name string) {
   470  	if si, idx := s.streamIndex(name); si != nil {
   471  		heap.Fix(s, idx)
   472  	}
   473  }
   474  
   475  func (s *streamState) forEachStream(f func(bundlerStream) bool) {
   476  	// Clone our streams, since the callback may mutate their order.
   477  	streams := make([]bundlerStream, len(s.streams))
   478  	for i, s := range s.streams {
   479  		streams[i] = s
   480  	}
   481  
   482  	for _, s := range streams {
   483  		if !f(s) {
   484  			break
   485  		}
   486  	}
   487  }
   488  
   489  // removeStream removes a stream from the stream state.
   490  func (s *streamState) removeStream(name string) bundlerStream {
   491  	if si, idx := s.streamIndex(name); si != nil {
   492  		heap.Remove(s, idx)
   493  		return si
   494  	}
   495  	return nil
   496  }
   497  
   498  func (s *streamState) streamIndex(name string) (bundlerStream, int) {
   499  	for i, si := range s.streams {
   500  		if si.name() == name {
   501  			return si, i
   502  		}
   503  	}
   504  	return nil, -1
   505  }
   506  
   507  func (s *streamState) Len() int {
   508  	return len(s.streams)
   509  }
   510  
   511  func (s *streamState) Less(i, j int) bool {
   512  	si, sj := s.streams[i], s.streams[j]
   513  
   514  	if it, ok := si.expireTime(); ok {
   515  		if jt, ok := sj.expireTime(); ok {
   516  			return it.Before(jt)
   517  		}
   518  
   519  		// i has data, but j does not, so i < j.
   520  		return true
   521  	}
   522  
   523  	// i has no data, so i us greater than all other streams.
   524  	return false
   525  }
   526  
   527  func (s *streamState) Swap(i, j int) {
   528  	s.streams[i], s.streams[j] = s.streams[j], s.streams[i]
   529  }
   530  
   531  func (s *streamState) Push(x any) {
   532  	s.streams = append(s.streams, x.(bundlerStream))
   533  }
   534  
   535  func (s *streamState) Pop() any {
   536  	last := s.streams[len(s.streams)-1]
   537  	s.streams = s.streams[:len(s.streams)-1]
   538  	return last
   539  }