go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/archive/archive.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package archive constructs a LogDog archive out of log stream components.
    16  // Records are read from the stream and emitted as an archive.
    17  package archive
    18  
    19  import (
    20  	"crypto/sha256"
    21  	"encoding/hex"
    22  	"io"
    23  	"reflect"
    24  
    25  	cl "cloud.google.com/go/logging"
    26  	"github.com/golang/protobuf/proto"
    27  
    28  	"go.chromium.org/luci/common/data/recordio"
    29  	"go.chromium.org/luci/common/logging"
    30  	"go.chromium.org/luci/common/sync/parallel"
    31  	"go.chromium.org/luci/logdog/api/logpb"
    32  	"go.chromium.org/luci/logdog/common/renderer"
    33  )
    34  
    35  // CloudLogging entry has a limit of 256KB in the internal byte representation.
    36  // If an entry is larger, CloudLogging will reject the entry with an error.
    37  //
    38  // To minimize the chance of a LogEntry exceeding the limit, Archive applies
    39  // the following limits to the entry before exporting logpb.LogEntry
    40  // to CloudLogging.
    41  const (
    42  	// maxPayload is the maximum size for the payload of a CloudLogging entry.
    43  	//
    44  	// If a single line exceeds the limit in size, the line likely contains
    45  	// a dump of a serialized object, which wouldn't be useful in searches, and
    46  	// the line will get truncated when being exported to CloudLogging.
    47  	maxPayload = 128 * 1024
    48  
    49  	// maxTagSum is the maximum size sum of tag keys and values that can be
    50  	// attached to a CloudLogging Entry. If the sum exceeds the limit,
    51  	// the stream won't be exported to CloudLogging.
    52  	maxTagSum = 96 * 1024
    53  )
    54  
    55  // CLLogger is a general interface for CloudLogging logger and intended to enable
    56  // unit tests and stub out CloudLogging.
    57  type CLLogger interface {
    58  	Log(cl.Entry)
    59  }
    60  
    61  // Manifest is a set of archival parameters.
    62  type Manifest struct {
    63  	// LUCIProject is the LUCI project for the stream.
    64  	LUCIProject string
    65  
    66  	// Desc is the logpb.LogStreamDescriptor for the stream.
    67  	Desc *logpb.LogStreamDescriptor
    68  	// Source is the LogEntry Source for the stream.
    69  	Source renderer.Source
    70  
    71  	// LogWriter, if not nil, is the Writer to which the log stream record stream
    72  	// will be written.
    73  	LogWriter io.Writer
    74  	// IndexWriter, if not nil, is the Writer to which the log stream Index
    75  	// protobuf stream will be written.
    76  	IndexWriter io.Writer
    77  
    78  	// StreamIndexRange, if >0, is the maximum number of log entry stream indices
    79  	// in between successive index entries.
    80  	//
    81  	// If no index constraints are set, an index entry will be emitted for each
    82  	// LogEntry.
    83  	StreamIndexRange int
    84  	// PrefixIndexRange, if >0, is the maximum number of log entry prefix indices
    85  	// in between successive index entries.
    86  	PrefixIndexRange int
    87  	// ByteRange, if >0, is the maximum number of log entry bytes in between
    88  	// successive index entries.
    89  	ByteRange int
    90  
    91  	// Logger, if not nil, will be used to log status during archival.
    92  	Logger logging.Logger
    93  
    94  	// CloudLogger, if not nil, will be used to export archived log entries to
    95  	// Cloud Logging.
    96  	CloudLogger CLLogger
    97  
    98  	// sizeFunc is a size method override used for testing.
    99  	sizeFunc func(proto.Message) int
   100  }
   101  
   102  func (m *Manifest) logger() logging.Logger {
   103  	if m.Logger == nil ||
   104  		(reflect.ValueOf(m.Logger).Kind() == reflect.Ptr &&
   105  			reflect.ValueOf(m.Logger).IsNil()) {
   106  		return logging.Null
   107  	}
   108  	return m.Logger
   109  }
   110  
   111  // Archive performs the log archival described in the supplied Manifest.
   112  func Archive(m Manifest) error {
   113  	// Wrap our log source in a safeLogEntrySource to protect our index order.
   114  	m.Source = &safeLogEntrySource{
   115  		Manifest: &m,
   116  		Source:   m.Source,
   117  	}
   118  
   119  	// If no constraints are applied, index every LogEntry.
   120  	if m.StreamIndexRange <= 0 && m.PrefixIndexRange <= 0 && m.ByteRange <= 0 {
   121  		m.StreamIndexRange = 1
   122  	}
   123  
   124  	if m.LogWriter == nil {
   125  		return nil
   126  	}
   127  
   128  	// If we're constructing an index, allocate a stateful index builder.
   129  	var idx *indexBuilder
   130  	if m.IndexWriter != nil {
   131  		idx = &indexBuilder{
   132  			Manifest: &m,
   133  			index: logpb.LogIndex{
   134  				Desc: m.Desc,
   135  			},
   136  			sizeFunc: m.sizeFunc,
   137  		}
   138  	}
   139  
   140  	// Compute a hash to be used as the ID of the stream in Cloud Logging.
   141  	sha := sha256.New()
   142  	sha.Write([]byte(m.LUCIProject))
   143  	sha.Write([]byte(m.Desc.Prefix))
   144  	sha.Write([]byte(m.Desc.Name))
   145  	streamIDHash := sha.Sum(nil)
   146  
   147  	return parallel.FanOutIn(func(taskC chan<- func() error) {
   148  		logC := make(chan *logpb.LogEntry)
   149  
   150  		taskC <- func() error {
   151  			if err := archiveLogs(m.LogWriter, m.Desc, logC, idx, m.CloudLogger, streamIDHash, m.logger()); err != nil {
   152  				return err
   153  			}
   154  
   155  			// If we're building an index, emit it now that the log stream has
   156  			// finished.
   157  			if idx != nil {
   158  				return idx.emit(m.IndexWriter)
   159  			}
   160  			return nil
   161  		}
   162  
   163  		// Iterate through all of our Source's logs and process them.
   164  		taskC <- func() error {
   165  			defer close(logC)
   166  
   167  			for {
   168  				le, err := m.Source.NextLogEntry()
   169  				if le != nil {
   170  					logC <- le
   171  				}
   172  
   173  				switch err {
   174  				case nil:
   175  				case io.EOF:
   176  					return nil
   177  				default:
   178  					return err
   179  				}
   180  			}
   181  		}
   182  	})
   183  }
   184  
   185  func archiveLogs(w io.Writer, d *logpb.LogStreamDescriptor, logC <-chan *logpb.LogEntry, idx *indexBuilder, cloudLogger CLLogger, streamIDHash []byte, logger logging.Logger) error {
   186  	offset := int64(0)
   187  	out := func(pb proto.Message) error {
   188  		d, err := proto.Marshal(pb)
   189  		if err != nil {
   190  			return err
   191  		}
   192  
   193  		count, err := recordio.WriteFrame(w, d)
   194  		offset += int64(count)
   195  		return err
   196  	}
   197  
   198  	isCLDisabled := (cloudLogger == nil ||
   199  		(reflect.ValueOf(cloudLogger).Kind() == reflect.Ptr &&
   200  			reflect.ValueOf(cloudLogger).IsNil()))
   201  
   202  	if !isCLDisabled {
   203  		tsum := 0
   204  		for k, v := range d.GetTags() {
   205  			tsum += len(k)
   206  			tsum += len(v)
   207  
   208  			if tsum > maxTagSum {
   209  				logger.Errorf("sum(tags) > %d; skipping the stream for CloudLogging export", maxTagSum)
   210  				isCLDisabled = true
   211  				break
   212  			}
   213  		}
   214  	}
   215  	// Start with our descriptor protobuf. Defer error handling until later, as
   216  	// we are still responsible for draining "logC".
   217  	err := out(d)
   218  
   219  	eb := newEntryBuffer(maxPayload, hex.EncodeToString(streamIDHash), d)
   220  	for le := range logC {
   221  		if err != nil {
   222  			continue
   223  		}
   224  
   225  		// Add this LogEntry to our index, noting the current offset.
   226  		if idx != nil {
   227  			idx.addLogEntry(le, offset)
   228  		}
   229  		err = out(le)
   230  
   231  		// Skip CloudLogging export, if disabled.
   232  		if isCLDisabled {
   233  			continue
   234  		}
   235  		for _, entry := range eb.append(le) {
   236  			cloudLogger.Log(*entry)
   237  		}
   238  	}
   239  
   240  	// Export the last entry.
   241  	//
   242  	// If there was an error, the buffered line can possibly contain
   243  	// an incomplete line, which was going to be completed in the next LogEntry.
   244  	//
   245  	// If so, skip flushing out the buffered line to prevent the complete
   246  	// version of the line from being deduped on the next retry of
   247  	// the archival task.
   248  	if err == nil {
   249  		if entry := eb.flush(); entry != nil {
   250  			cloudLogger.Log(*entry)
   251  		}
   252  	}
   253  	return err
   254  }