go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/stream.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bundler
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"go.chromium.org/luci/logdog/api/logpb"
    24  )
    25  
    26  var (
    27  	// dataBufferSize is the size (in bytes) of the Data objects that a Stream
    28  	// will lease.
    29  	dataBufferSize = 4096
    30  )
    31  
    32  // Stream is an individual Bundler Stream. Data is added to the Stream as a
    33  // series of ordered binary chunks.
    34  //
    35  // A Stream is not goroutine-safe.
    36  type Stream interface {
    37  	// LeaseData allocates and returns a Data block that stream data can be
    38  	// loaded into. The caller should Release() the Data, or transfer ownership to
    39  	// something that will (e.g., Append()).
    40  	//
    41  	// If the leased data is not Released, it is merely inefficient, not fatal.
    42  	LeaseData() Data
    43  
    44  	// Append adds a sequential chunk of data to the Stream. Append may block if
    45  	// the data isn't ready to be consumed.
    46  	//
    47  	// Append takes ownership of the data regardless of whether or not it returns
    48  	// an error. The supplied Data must not be referenced after calling Append.
    49  	Append(Data) error
    50  
    51  	// Close closes the Stream, flushing any remaining data.
    52  	Close()
    53  }
    54  
    55  // streamConfig is the set of static configuration parameters for the stream.
    56  type streamConfig struct {
    57  	// name is the name of this stream.
    58  	name string
    59  
    60  	// parser is the stream parser to use.
    61  	parser parser
    62  
    63  	// maximumBufferedBytes is the maximum number of bytes that this stream will
    64  	// retain in its parser before blocking subsequent Append attempts.
    65  	maximumBufferedBytes int64
    66  	// maximumBufferDuration is the maximum amount of time that a block of data
    67  	// can be comfortably buffered in the stream.
    68  	maximumBufferDuration time.Duration
    69  
    70  	// template is the minimally-populated Butler log bundle entry.
    71  	template logpb.ButlerLogBundle_Entry
    72  
    73  	// onAppend, if not nil, is invoked when an attempt to append data to the
    74  	// stream occurs. If true is passed, the data was successfully appended. If
    75  	// false was passed, the data could not be appended immediately and the stream
    76  	// will block pending data consumption.
    77  	//
    78  	// The stream's append lock will be held when this method is called.
    79  	onAppend func(bool)
    80  }
    81  
    82  // streamImpl is a Stream implementation that is bound to a Bundler.
    83  type streamImpl struct {
    84  	c *streamConfig
    85  
    86  	// drained is true if the stream is finished emitting data, including its
    87  	// terminal state.
    88  	//
    89  	// It is an atomic value, with zero indicating not drained and non-zero
    90  	// indicating drained. It should be accessed via isDrained, and set with
    91  	// setDrained.
    92  	drained int32
    93  
    94  	// parserLock is a Mutex protecting the stream's parser instance and its
    95  	// underlying chunk.Buffer. Any access to either of these fields must be done
    96  	// while holding this lock.
    97  	parserLock sync.Mutex
    98  
    99  	// dataConsumedSignalC is a channel that can be used to signal when data has
   100  	// been consumed. It is set via signalDataConsumed.
   101  	dataConsumedSignalC chan struct{}
   102  
   103  	// stateLock protects stream state against concurrent access.
   104  	stateLock sync.Mutex
   105  
   106  	// closed, if non-zero, indicates that we have been closed and our stream has
   107  	// finished reading.
   108  	//
   109  	// stateLock must be held when accessing this field.
   110  	closed bool
   111  
   112  	// lastLogEntry is a pointer to the last LogEntry that was exported.
   113  	//
   114  	// stateLock must be held when accessing this field.
   115  	lastLogEntry *logpb.LogEntry
   116  
   117  	// appendErr is the error that should be returned by Append. It is set when
   118  	// stream content processing hits a fatal state.
   119  	appendErr error
   120  }
   121  
   122  func newStream(c streamConfig) *streamImpl {
   123  	return &streamImpl{
   124  		c: &c,
   125  
   126  		dataConsumedSignalC: make(chan struct{}, 1),
   127  	}
   128  }
   129  
   130  func (s *streamImpl) LeaseData() Data {
   131  	return globalDataPoolRegistry.getPool(dataBufferSize).getData()
   132  }
   133  
   134  func (s *streamImpl) Append(d Data) error {
   135  	// Block/loop until we've successfully appended the data.
   136  	for {
   137  		dLen := int64(len(d.Bytes()))
   138  		if err := s.appendError(); err != nil || dLen == 0 {
   139  			d.Release()
   140  			return err
   141  		}
   142  
   143  		s.withParserLock(func() error {
   144  			if s.c.parser.bufferedBytes() == 0 ||
   145  				s.c.parser.bufferedBytes()+dLen <= s.c.maximumBufferedBytes {
   146  				s.c.parser.appendData(d)
   147  				d = nil
   148  			}
   149  			return nil
   150  		})
   151  
   152  		// The data was appended; we're done.
   153  		if s.c.onAppend != nil {
   154  			s.c.onAppend(d == nil)
   155  		}
   156  		if d == nil {
   157  			break
   158  		}
   159  
   160  		// Not ready to append; wait for a data event and re-evaluate.
   161  		<-s.dataConsumedSignalC
   162  	}
   163  
   164  	if d != nil {
   165  		d.Release()
   166  	}
   167  	return nil
   168  }
   169  
   170  // Signals our Append loop that data has been consumed.
   171  func (s *streamImpl) signalDataConsumed() {
   172  	select {
   173  	case s.dataConsumedSignalC <- struct{}{}:
   174  		break
   175  
   176  	default:
   177  		break
   178  	}
   179  }
   180  
   181  func (s *streamImpl) Close() {
   182  	s.stateLock.Lock()
   183  	defer s.stateLock.Unlock()
   184  	s.closeLocked()
   185  }
   186  
   187  func (s *streamImpl) closeLocked() {
   188  	s.closed = true
   189  	if s.c.onAppend != nil {
   190  		// If anyone is listening, notify that our state has changed; it doesn't
   191  		// actually matter WHEN this state notification happens, just that it
   192  		// happens after closed=true.
   193  		//
   194  		// The current implementation of Bundler has this as b.signalStreamUpdate(),
   195  		// which is synchronized with Bundler.streamsLock so doing this without
   196  		// a goroutine can lead to deadlock.
   197  		go s.c.onAppend(true)
   198  	}
   199  }
   200  
   201  func (s *streamImpl) name() string {
   202  	return s.c.name
   203  }
   204  
   205  // isDrained returns true if this stream is finished emitting data, including
   206  // its terminal state.
   207  //
   208  // This can happen if either:
   209  // - The stream is closed and has no more buffered data, or
   210  // - The stream has encountered a fatal error during processing.
   211  func (s *streamImpl) isDrained() bool {
   212  	return atomic.LoadInt32(&s.drained) != 0
   213  }
   214  
   215  // setDrained marks this stream as drained.
   216  func (s *streamImpl) setDrained() {
   217  	atomic.StoreInt32(&s.drained, 1)
   218  }
   219  
   220  // noMoreDataLocked returns true if our stream has been closed and its buffer
   221  // is empty.
   222  //
   223  // The stream's stateLock must be held when calling this method.
   224  func (s *streamImpl) noMoreDataLocked() bool {
   225  	if !s.closed {
   226  		return false
   227  	}
   228  
   229  	// If we have an append error, we will no longer accept or consume data.
   230  	if s.appendErr != nil {
   231  		return true
   232  	}
   233  
   234  	var bufSize int64
   235  	s.withParserLock(func() error {
   236  		bufSize = s.c.parser.bufferedBytes()
   237  		return nil
   238  	})
   239  	return bufSize == 0
   240  }
   241  
   242  // expireTime returns the Time when the oldest chunk in the stream will expire.
   243  //
   244  // This is calculated ask:
   245  // oldest.Timestamp + stream.maximumBufferDuration
   246  // If there is no buffered data, oldest will return nil.
   247  func (s *streamImpl) expireTime() (t time.Time, has bool) {
   248  	s.withParserLock(func() error {
   249  		t, has = s.c.parser.firstChunkTime()
   250  		return nil
   251  	})
   252  
   253  	if has {
   254  		t = t.Add(s.c.maximumBufferDuration)
   255  	}
   256  	return
   257  }
   258  
   259  // nextBundleEntry generates bundles for this stream. The total bundle data size
   260  // must not exceed the supplied size.
   261  //
   262  // If no bundle entry could be generated given the constraints, nil will be
   263  // returned.
   264  //
   265  // It is possible for some entries to be returned alongside an error.
   266  func (s *streamImpl) nextBundleEntry(bb *builder, aggressive bool) bool {
   267  	s.stateLock.Lock()
   268  	defer s.stateLock.Unlock()
   269  
   270  	// If we're not drained, try and get the next bundle.
   271  	modified := false
   272  	if !s.noMoreDataLocked() {
   273  		err := error(nil)
   274  		modified, err = s.nextBundleEntryLocked(bb, aggressive)
   275  		if err != nil {
   276  			s.setAppendErrorLocked(err)
   277  		}
   278  
   279  		if modified {
   280  			s.signalDataConsumed()
   281  		}
   282  	}
   283  
   284  	// If we're drained, populate our terminal state.
   285  	if s.noMoreDataLocked() {
   286  		if s.lastLogEntry != nil {
   287  			bb.setStreamTerminal(&s.c.template, s.lastLogEntry.StreamIndex)
   288  		}
   289  		s.setDrained()
   290  	}
   291  
   292  	return modified
   293  }
   294  
   295  func (s *streamImpl) nextBundleEntryLocked(bb *builder, aggressive bool) (bool, error) {
   296  	c := constraints{
   297  		allowSplit: aggressive,
   298  		closed:     s.closed,
   299  	}
   300  
   301  	// Extract as many entries as possible from the stream. As we extract, adjust
   302  	// our byte size.
   303  	//
   304  	// If we're closed, this will continue to consume until finished. If an error
   305  	// occurs, shut down data collection.
   306  	modified := false
   307  
   308  	for c.limit = bb.remaining(); c.limit > 0; c.limit = bb.remaining() {
   309  		emittedLog := false
   310  		err := s.withParserLock(func() error {
   311  			le, err := s.c.parser.nextEntry(&c)
   312  			if err != nil {
   313  				return err
   314  			}
   315  
   316  			if le == nil {
   317  				return nil
   318  			}
   319  
   320  			// Enforce basic log entry consistency.
   321  			if err := s.fixupLogEntry(s.lastLogEntry, le); err != nil {
   322  				return err
   323  			}
   324  
   325  			emittedLog = true
   326  			modified = true
   327  
   328  			bb.add(&s.c.template, le)
   329  			s.lastLogEntry = le
   330  			return nil
   331  		})
   332  
   333  		if err != nil || !emittedLog {
   334  			return modified, err
   335  		}
   336  	}
   337  	return modified, nil
   338  }
   339  
   340  // fixupLogEntry asserts and corrects a log entry's stream offset and ordering
   341  // given the previous entry in the stream.
   342  //
   343  // If prev is nil, that means that cur is expected to be the first log entry
   344  // in the stream.
   345  func (s *streamImpl) fixupLogEntry(prev, cur *logpb.LogEntry) error {
   346  	if prev == nil {
   347  		if cur.StreamIndex != 0 {
   348  			return fmt.Errorf("first log entry is not zero index (%d)", cur.StreamIndex)
   349  		}
   350  	} else {
   351  		if cur.StreamIndex != prev.StreamIndex+1 {
   352  			return fmt.Errorf("non-contiguous stream indices (%d != %d)", cur.StreamIndex, prev.StreamIndex+1)
   353  		}
   354  
   355  		if cur.TimeOffset.AsDuration() < prev.TimeOffset.AsDuration() {
   356  			to := *prev.TimeOffset
   357  			cur.TimeOffset = &to
   358  		}
   359  	}
   360  
   361  	return nil
   362  }
   363  
   364  func (s *streamImpl) withParserLock(f func() error) error {
   365  	s.parserLock.Lock()
   366  	defer s.parserLock.Unlock()
   367  
   368  	return f()
   369  }
   370  
   371  func (s *streamImpl) appendError() error {
   372  	s.stateLock.Lock()
   373  	defer s.stateLock.Unlock()
   374  
   375  	return s.appendErr
   376  }
   377  
   378  func (s *streamImpl) setAppendErrorLocked(err error) {
   379  	s.appendErr = err
   380  
   381  	s.closeLocked()
   382  	s.signalDataConsumed()
   383  }
   384  
   385  func (s *streamImpl) streamDesc() *logpb.LogStreamDescriptor {
   386  	return s.c.template.Desc
   387  }