go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/fetcher/fetcher.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fetcher
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"time"
    23  
    24  	"github.com/golang/protobuf/proto"
    25  	"go.chromium.org/luci/common/clock"
    26  	log "go.chromium.org/luci/common/logging"
    27  	"go.chromium.org/luci/logdog/api/logpb"
    28  	"go.chromium.org/luci/logdog/common/renderer"
    29  	"go.chromium.org/luci/logdog/common/types"
    30  )
    31  
    32  const (
    33  	// DefaultDelay is the default Delay value.
    34  	DefaultDelay = 5 * time.Second
    35  
    36  	// DefaultBufferBytes is the default number of bytes to buffer.
    37  	DefaultBufferBytes = int64(1024 * 1024) // 1MB
    38  )
    39  
    40  // LogRequest is a structure used by the Fetcher to request a range of logs
    41  // from its Source.
    42  type LogRequest struct {
    43  	// Index is the starting log index to request.
    44  	Index types.MessageIndex
    45  	// Count, if >0, is the maximum number of log entries to request.
    46  	Count int
    47  	// Bytes, if >0, is the maximum number of log bytes to request. At least one
    48  	// log must be returned regardless of the byte limit.
    49  	Bytes int64
    50  }
    51  
    52  // Source is the source of log stream and log information.
    53  //
    54  // The Source is responsible for handling retries, backoff, and transient
    55  // errors. An error from the Source will shut down the Fetcher.
    56  type Source interface {
    57  	// LogEntries populates the supplied LogRequest with available sequential
    58  	// log entries as available.
    59  	//
    60  	// This may optionally block pending new log entries, but may also return zero
    61  	// log entries if none are available yet.
    62  	//
    63  	// Upon success, the requested logs and terminal message index is returned. If
    64  	// no terminal index is known, a value <0 will be returned.
    65  	LogEntries(context.Context, *LogRequest) ([]*logpb.LogEntry, types.MessageIndex, error)
    66  
    67  	// Descriptor returns the stream's descriptor, if the source knows it.
    68  	//
    69  	// If the source doesn't have this information, this should return nil.
    70  	Descriptor() *logpb.LogStreamDescriptor
    71  }
    72  
    73  // Options is the set of configuration parameters for a Fetcher.
    74  type Options struct {
    75  	// Source is the log stream source.
    76  	Source Source
    77  
    78  	// Index is the starting stream index to retrieve.
    79  	Index types.MessageIndex
    80  	// Count is the total number of logs to retrieve. If zero, the full stream
    81  	// will be fetched.
    82  	Count int64
    83  
    84  	// Count is the minimum amount of LogEntry records to buffer. If the buffered
    85  	// amount dips below Count, more logs will be fetched.
    86  	//
    87  	// If zero, no count target will be applied.
    88  	BufferCount int
    89  
    90  	// BufferBytes is the target number of LogEntry bytes to buffer. If the
    91  	// buffered amount dips below Bytes, more logs will be fetched.
    92  	//
    93  	// If zero, no byte target will be applied unless Count is also zero, in which
    94  	// case DefaultBufferBytes byte constraint will be applied.
    95  	BufferBytes int64
    96  
    97  	// PrefetchFactor constrains the amount of additional data to fetch when
    98  	// refilling the buffer. Effective Count and Bytes values are multiplied
    99  	// by PrefetchFactor to determine the amount of logs to request.
   100  	PrefetchFactor int
   101  
   102  	// Delay is the amount of time to wait in between unsuccessful log requests.
   103  	Delay time.Duration
   104  
   105  	// Set this to immediately bail out with ErrIncompleteStream if the stream
   106  	// isn't complete yet. This can be useful when you believe the stream to
   107  	// already be terminal, but haven't done an RPC with LogDog yet to actually
   108  	// confirm this.
   109  	RequireCompleteStream bool
   110  
   111  	// sizeFunc is a function that calculates the byte size of a LogEntry
   112  	// protobuf.
   113  	//
   114  	// If nil, proto.Size will be used. This is used for testing.
   115  	sizeFunc func(proto.Message) int
   116  }
   117  
   118  // ErrIncompleteStream is returned by Fetcher if Options.RequireCompleteStream
   119  // was true and the underlying Stream is still incomplete (i.e. has not yet
   120  // been terminated by the client, or archived).
   121  var ErrIncompleteStream = errors.New("stream has not yet terminated")
   122  
   123  // A Fetcher buffers LogEntry records by querying the Source for log data.
   124  // It attmepts to maintain a steady stream of records by prefetching available
   125  // records in advance of consumption.
   126  //
   127  // A Fetcher is not goroutine-safe.
   128  type Fetcher struct {
   129  	// o is the set of Options.
   130  	o *Options
   131  
   132  	// logC is the communication channel between the fetch goroutine and
   133  	// the user-facing NextLogEntry. Logs are punted through this channel
   134  	// one-by-one.
   135  	logC chan *logResponse
   136  	// fetchErr is the retained error state. If not nil, fetching has stopped and
   137  	// all Fetcher methods will return this error.
   138  	fetchErr error
   139  }
   140  
   141  // New instantiates a new Fetcher instance.
   142  //
   143  // The Fetcher can be cancelled by cancelling the supplied context.
   144  func New(c context.Context, o Options) *Fetcher {
   145  	if o.Delay <= 0 {
   146  		o.Delay = DefaultDelay
   147  	}
   148  	if o.BufferBytes <= 0 && o.BufferCount <= 0 {
   149  		o.BufferBytes = DefaultBufferBytes
   150  	}
   151  	if o.PrefetchFactor <= 1 {
   152  		o.PrefetchFactor = 1
   153  	}
   154  
   155  	f := Fetcher{
   156  		o:    &o,
   157  		logC: make(chan *logResponse, o.Count),
   158  	}
   159  	go f.fetch(c)
   160  	return &f
   161  }
   162  
   163  type logResponse struct {
   164  	log   *logpb.LogEntry
   165  	size  int64
   166  	index types.MessageIndex
   167  	err   error
   168  }
   169  
   170  // NextLogEntry returns the next buffered LogEntry, blocking until it becomes
   171  // available.
   172  //
   173  // If the end of the log stream is encountered, NextLogEntry will return
   174  // io.EOF.
   175  //
   176  // If the Fetcher is cancelled, a context.Canceled error will be returned.
   177  func (f *Fetcher) NextLogEntry() (*logpb.LogEntry, error) {
   178  	var le *logpb.LogEntry
   179  	if f.fetchErr == nil {
   180  		resp := <-f.logC
   181  		if resp.err != nil {
   182  			f.fetchErr = resp.err
   183  		}
   184  		if resp.log != nil {
   185  			le = resp.log
   186  		}
   187  	}
   188  	return le, f.fetchErr
   189  }
   190  
   191  type fetchRequest struct {
   192  	index types.MessageIndex
   193  	count int
   194  	bytes int64
   195  	respC chan *fetchResponse
   196  }
   197  
   198  type fetchResponse struct {
   199  	logs []*logpb.LogEntry
   200  	tidx types.MessageIndex
   201  	err  error
   202  }
   203  
   204  // fetch is run in a separate goroutine. It handles buffering goroutine and
   205  // punts logs and/or error state to NextLogEntry via logC.
   206  func (f *Fetcher) fetch(c context.Context) {
   207  	defer close(f.logC)
   208  
   209  	lb := logBuffer{}
   210  	nextFetchIndex := f.o.Index
   211  	lastSentIndex := types.MessageIndex(-1)
   212  	tidx := types.MessageIndex(-1)
   213  
   214  	c, cancelFunc := context.WithCancel(c)
   215  	bytes := int64(0)
   216  	logFetchBaseC := make(chan *fetchResponse)
   217  	var logFetchC chan *fetchResponse
   218  	defer func() {
   219  		// Reap our fetch goroutine, if still fetching.
   220  		if logFetchC != nil {
   221  			cancelFunc()
   222  			<-logFetchC
   223  		}
   224  		close(logFetchBaseC)
   225  	}()
   226  
   227  	errOut := func(err error) {
   228  		f.logC <- &logResponse{err: err}
   229  	}
   230  
   231  	count := int64(0)
   232  	for tidx < 0 || lastSentIndex < tidx {
   233  		// If we're configured with an upper delivery bound and we've delivered
   234  		// that many entries, we're done.
   235  		remaining := int64(-1)
   236  		if f.o.Count > 0 {
   237  			remaining = f.o.Count - count
   238  			if remaining <= 0 {
   239  				log.Fields{
   240  					"count": count,
   241  				}.Debugf(c, "Exceeded maximum log count.")
   242  				break
   243  			}
   244  		}
   245  
   246  		// If we have a buffered log, prepare it for sending.
   247  		var sendC chan<- *logResponse
   248  		var lr *logResponse
   249  		if le := lb.current(); le != nil {
   250  			lr = &logResponse{
   251  				log:   le,
   252  				size:  f.sizeOf(le),
   253  				index: types.MessageIndex(le.StreamIndex),
   254  			}
   255  			sendC = f.logC
   256  		}
   257  
   258  		// If we're not currently fetching logs, and we are below our thresholds,
   259  		// request a new batch of logs.
   260  		if logFetchC == nil && (tidx < 0 || nextFetchIndex <= tidx) {
   261  
   262  			// We always have a byte constraint. Are we below it?
   263  			fetchCount := f.applyConstraint(int64(f.o.BufferCount), int64(lb.size()))
   264  			fetchBytes := f.applyConstraint(f.o.BufferBytes, bytes)
   265  
   266  			// Apply maximum log count constraint, if one is specified.
   267  			if remaining >= 0 {
   268  				// Factor in our currently-buffered logs.
   269  				remaining -= int64(lb.size())
   270  
   271  				switch {
   272  				case remaining <= 0:
   273  					// No more to fetch, we've buffered all the logs we're going to need.
   274  					fetchCount = -1
   275  				case fetchCount == 0, remaining < fetchCount:
   276  					fetchCount = remaining
   277  				}
   278  			}
   279  
   280  			// Fetch logs if neither constraint has vetoed.
   281  			if fetchCount >= 0 && fetchBytes >= 0 {
   282  				logFetchC = logFetchBaseC
   283  				req := fetchRequest{
   284  					index: nextFetchIndex,
   285  					count: int(fetchCount),
   286  					bytes: fetchBytes,
   287  					respC: logFetchC,
   288  				}
   289  
   290  				log.Fields{
   291  					"index": req.index,
   292  					"count": req.count,
   293  					"bytes": req.bytes,
   294  				}.Debugf(c, "Buffering next round of logs...")
   295  				go f.fetchLogs(c, &req)
   296  			}
   297  		}
   298  
   299  		select {
   300  		case <-c.Done():
   301  			// Our Context has been cancelled. Terminate and propagate that error.
   302  			log.WithError(c.Err()).Debugf(c, "Context has been cancelled.")
   303  			errOut(c.Err())
   304  			return
   305  
   306  		case resp := <-logFetchC:
   307  			logFetchC = nil
   308  			if resp.err != nil {
   309  				log.WithError(resp.err).Errorf(c, "Error fetching logs.")
   310  				errOut(resp.err)
   311  				return
   312  			}
   313  
   314  			if f.o.RequireCompleteStream && resp.tidx == -1 {
   315  				errOut(ErrIncompleteStream)
   316  				return
   317  			}
   318  
   319  			// Account for each log and add it to our buffer.
   320  			if len(resp.logs) > 0 {
   321  				for _, le := range resp.logs {
   322  					bytes += int64(f.sizeOf(le))
   323  				}
   324  				nextFetchIndex = types.MessageIndex(resp.logs[len(resp.logs)-1].StreamIndex) + 1
   325  				lb.append(resp.logs...)
   326  			}
   327  			if resp.tidx >= 0 {
   328  				tidx = resp.tidx
   329  			}
   330  
   331  		case sendC <- lr:
   332  			bytes -= lr.size
   333  			lastSentIndex = lr.index
   334  
   335  			count++
   336  			lb.next()
   337  		}
   338  	}
   339  
   340  	// We've punted the full log stream.
   341  	errOut(io.EOF)
   342  }
   343  
   344  func (f *Fetcher) fetchLogs(c context.Context, req *fetchRequest) {
   345  	resp := fetchResponse{}
   346  	defer func() {
   347  		if r := recover(); r != nil {
   348  			resp.err = fmt.Errorf("panic during fetch: %v", r)
   349  		}
   350  		req.respC <- &resp
   351  	}()
   352  
   353  	// Request the next chunk of logs from our Source.
   354  	lreq := LogRequest{
   355  		Index: req.index,
   356  		Count: req.count,
   357  		Bytes: req.bytes,
   358  	}
   359  	if lreq.Count < 0 {
   360  		lreq.Count = 0
   361  	}
   362  	if lreq.Bytes < 0 {
   363  		lreq.Bytes = 0
   364  	}
   365  
   366  	for {
   367  		log.Fields{
   368  			"index": req.index,
   369  		}.Debugf(c, "Fetching logs.")
   370  		resp.logs, resp.tidx, resp.err = f.o.Source.LogEntries(c, &lreq)
   371  		switch {
   372  		case resp.err != nil:
   373  			log.Fields{
   374  				log.ErrorKey: resp.err,
   375  				"index":      req.index,
   376  			}.Errorf(c, "Fetch returned error.")
   377  			return
   378  
   379  		case len(resp.logs) > 0:
   380  			log.Fields{
   381  				"index":         req.index,
   382  				"count":         len(resp.logs),
   383  				"terminalIndex": resp.tidx,
   384  				"firstLog":      resp.logs[0].StreamIndex,
   385  				"lastLog":       resp.logs[len(resp.logs)-1].StreamIndex,
   386  			}.Debugf(c, "Fetched log entries.")
   387  			return
   388  
   389  		case resp.tidx >= 0 && req.index > resp.tidx:
   390  			// Will never be satisfied, since we're requesting beyond the terminal
   391  			// index.
   392  			return
   393  
   394  		default:
   395  			// No logs this round. Sleep for more.
   396  			log.Fields{
   397  				"index": req.index,
   398  				"delay": f.o.Delay,
   399  			}.Infof(c, "No logs returned. Sleeping...")
   400  			if tr := clock.Sleep(c, f.o.Delay); tr.Incomplete() {
   401  				log.WithError(tr.Err).Warningf(c, "Context was canceled.")
   402  				resp.err = tr.Err
   403  				return
   404  			}
   405  		}
   406  	}
   407  }
   408  
   409  func (f *Fetcher) sizeOf(le *logpb.LogEntry) int64 {
   410  	sf := f.o.sizeFunc
   411  	if sf == nil {
   412  		sf = proto.Size
   413  	}
   414  	return int64(sf(le))
   415  }
   416  
   417  func (f *Fetcher) applyConstraint(want, have int64) int64 {
   418  	switch {
   419  	case want <= 0:
   420  		// No constraint, unbounded.
   421  		return 0
   422  
   423  	case want > have:
   424  		// We have fewer logs buffered. Fetch the difference (including
   425  		// prefetch).
   426  		return (want * int64(f.o.PrefetchFactor)) - have
   427  
   428  	default:
   429  		// We're within our constraint. Do not fetch logs.
   430  		return -1
   431  	}
   432  }
   433  
   434  // Reader returns an io.Reader (a *renderer.Renderer) for this Fetcher.
   435  func (f *Fetcher) Reader() io.Reader {
   436  	return &renderer.Renderer{
   437  		Source: f,
   438  		Raw:    true,
   439  	}
   440  }
   441  
   442  // Descriptor returns the last-known Descriptor from the underlying Stream, if
   443  // known.
   444  //
   445  // If the underlying stream doesn't have this information, this returns nil.
   446  func (f *Fetcher) Descriptor() *logpb.LogStreamDescriptor {
   447  	return f.o.Source.Descriptor()
   448  }