go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/archive/archive_test.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package archive
    16  
    17  import (
    18  	"bytes"
    19  	"crypto/sha256"
    20  	"fmt"
    21  	"io"
    22  	"strconv"
    23  	"testing"
    24  	"time"
    25  
    26  	cl "cloud.google.com/go/logging"
    27  	"github.com/golang/protobuf/proto"
    28  	"google.golang.org/protobuf/types/known/durationpb"
    29  	"google.golang.org/protobuf/types/known/timestamppb"
    30  
    31  	"go.chromium.org/luci/common/clock/testclock"
    32  	"go.chromium.org/luci/common/data/recordio"
    33  	"go.chromium.org/luci/common/errors"
    34  	"go.chromium.org/luci/common/iotools"
    35  	"go.chromium.org/luci/logdog/api/logpb"
    36  
    37  	. "github.com/smartystreets/goconvey/convey"
    38  
    39  	. "go.chromium.org/luci/common/testing/assertions"
    40  )
    41  
    42  func gen(i int) *logpb.LogEntry {
    43  	return &logpb.LogEntry{
    44  		TimeOffset:  durationpb.New(time.Duration(i) * time.Second),
    45  		PrefixIndex: uint64(i) * 2,
    46  		StreamIndex: uint64(i),
    47  		Sequence:    uint64(i),
    48  		Content: &logpb.LogEntry_Text{
    49  			Text: &logpb.Text{
    50  				Lines: []*logpb.Text_Line{
    51  					{Value: []byte(strconv.Itoa(i)), Delimiter: "\n"},
    52  				},
    53  			},
    54  		},
    55  	}
    56  }
    57  
    58  type testSource struct {
    59  	logs []*logpb.LogEntry
    60  	err  error
    61  }
    62  
    63  func (s *testSource) addLogEntry(le *logpb.LogEntry) {
    64  	s.logs = append(s.logs, le)
    65  }
    66  
    67  func (s *testSource) add(indices ...int) {
    68  	for _, i := range indices {
    69  		s.addLogEntry(gen(i))
    70  	}
    71  }
    72  
    73  func (s *testSource) addEntries(entries ...*logpb.LogEntry) {
    74  	s.logs = append(s.logs, entries...)
    75  }
    76  
    77  func (s *testSource) NextLogEntry() (le *logpb.LogEntry, err error) {
    78  	if err = s.err; err != nil {
    79  		return
    80  	}
    81  	if len(s.logs) == 0 {
    82  		err = io.EOF
    83  		return
    84  	}
    85  
    86  	le, s.logs = s.logs[0], s.logs[1:]
    87  	return
    88  }
    89  
    90  type errWriter struct {
    91  	io.Writer
    92  	err error
    93  }
    94  
    95  func (w *errWriter) Write(d []byte) (int, error) {
    96  	if w.err != nil {
    97  		return 0, w.err
    98  	}
    99  	return w.Writer.Write(d)
   100  }
   101  
   102  // testCLLogger implements CLLogger for testing and instrumentation.
   103  type testCLLogger struct {
   104  	entry []*cl.Entry
   105  }
   106  
   107  func (l *testCLLogger) Log(e cl.Entry) {
   108  	l.entry = append(l.entry, &e)
   109  }
   110  
   111  // indexParams umarshals an index from a byte stream and removes any entries.
   112  func indexParams(d []byte) *logpb.LogIndex {
   113  	var index logpb.LogIndex
   114  	if err := proto.Unmarshal(d, &index); err != nil {
   115  		panic(fmt.Errorf("failed to unmarshal index protobuf: %v", err))
   116  	}
   117  	index.Entries = nil
   118  	return &index
   119  }
   120  
   121  type indexChecker struct {
   122  	fixedSize int
   123  }
   124  
   125  func (ic *indexChecker) size(pb proto.Message) int {
   126  	if ic.fixedSize > 0 {
   127  		return ic.fixedSize
   128  	}
   129  	return proto.Size(pb)
   130  }
   131  
   132  // shouldContainIndexFor validates the correctness and completeness of the
   133  // supplied index.
   134  //
   135  // actual should be a *bytes.Buffer that contains a serialized index protobuf.
   136  //
   137  // expected[0] should be the log stream descriptor.
   138  // expected[1] should be a *bytes.Buffer that contains the log RecordIO stream.
   139  //
   140  // If additional expected elements are supplied, they are the specific integers
   141  // that should appear in the index. Otherwise, it is assumed that the log is a
   142  // complete index.
   143  func (ic *indexChecker) shouldContainIndexFor(actual any, expected ...any) string {
   144  	indexB := actual.(*bytes.Buffer)
   145  
   146  	if len(expected) < 2 {
   147  		return "at least two expected arguments are required"
   148  	}
   149  	desc := expected[0].(*logpb.LogStreamDescriptor)
   150  	logB := expected[1].(*bytes.Buffer)
   151  	expected = expected[2:]
   152  
   153  	// Load our log index.
   154  	index := logpb.LogIndex{}
   155  	if err := proto.Unmarshal(indexB.Bytes(), &index); err != nil {
   156  		return fmt.Sprintf("failed to unmarshal index protobuf: %v", err)
   157  	}
   158  
   159  	// Descriptors must match.
   160  	if err := ShouldResembleProto(index.Desc, desc); err != "" {
   161  		return err
   162  	}
   163  
   164  	// Catalog the log entries in "expected".
   165  	entries := map[uint64]*logpb.LogEntry{}
   166  	offsets := map[uint64]int64{}
   167  	csizes := map[uint64]uint64{}
   168  	var eidx []uint64
   169  
   170  	// Skip the first frame (descriptor).
   171  	cr := iotools.CountingReader{
   172  		Reader: logB,
   173  	}
   174  	csize := uint64(0)
   175  	r := recordio.NewReader(&cr, 1024*1024)
   176  	d, err := r.ReadFrameAll()
   177  	if err != nil {
   178  		return "failed to skip descriptor frame"
   179  	}
   180  	for {
   181  		offset := cr.Count
   182  		d, err = r.ReadFrameAll()
   183  		if err == io.EOF {
   184  			break
   185  		}
   186  		if err != nil {
   187  			return fmt.Sprintf("failed to read entry #%d: %v", len(entries), err)
   188  		}
   189  
   190  		le := logpb.LogEntry{}
   191  		if err := proto.Unmarshal(d, &le); err != nil {
   192  			return fmt.Sprintf("failed to unmarshal entry #%d: %v", len(entries), err)
   193  		}
   194  		entries[le.StreamIndex] = &le
   195  		offsets[le.StreamIndex] = offset
   196  		csizes[le.StreamIndex] = csize
   197  
   198  		csize += uint64(ic.size(&le))
   199  		eidx = append(eidx, le.StreamIndex)
   200  	}
   201  
   202  	// Determine our expected archive indexes.
   203  	if len(expected) > 0 {
   204  		eidx = make([]uint64, 0, len(expected))
   205  		for _, e := range expected {
   206  			eidx = append(eidx, uint64(e.(int)))
   207  		}
   208  	}
   209  
   210  	iidx := make([]uint64, len(index.Entries))
   211  	for i, e := range index.Entries {
   212  		iidx[i] = e.StreamIndex
   213  	}
   214  	if err := ShouldResemble(iidx, eidx); err != "" {
   215  		return err
   216  	}
   217  
   218  	for i, cur := range index.Entries {
   219  		cidx := eidx[i]
   220  		le, offset := entries[uint64(cidx)], offsets[cidx]
   221  		if le == nil {
   222  			return fmt.Sprintf("no log entry for stream index %d", cidx)
   223  		}
   224  
   225  		if cur.StreamIndex != le.StreamIndex {
   226  			return fmt.Sprintf("index entry %d has incorrect stream index (%d != %d)", i, cur.StreamIndex, le.StreamIndex)
   227  		}
   228  		if cur.Offset != uint64(offset) {
   229  			return fmt.Sprintf("index entry %d has incorrect offset (%d != %d)", i, cur.Offset, offset)
   230  		}
   231  		if cur.PrefixIndex != le.PrefixIndex {
   232  			return fmt.Sprintf("index entry %d has incorrect prefix index (%d != %d)", i, cur.StreamIndex, le.PrefixIndex)
   233  		}
   234  		if curOff, leOff := cur.TimeOffset.AsDuration(), le.TimeOffset.AsDuration(); curOff != leOff {
   235  			return fmt.Sprintf("index entry %d has incorrect time offset (%v != %v)", i, curOff, leOff)
   236  		}
   237  		if cur.Sequence != le.Sequence {
   238  			return fmt.Sprintf("index entry %d has incorrect sequence (%d != %d)", i, cur.Sequence, le.Sequence)
   239  		}
   240  	}
   241  	return ""
   242  }
   243  
   244  func TestArchive(t *testing.T) {
   245  	Convey(`A Manifest connected to Buffer Writers`, t, func() {
   246  		var logB, indexB bytes.Buffer
   247  		desc := &logpb.LogStreamDescriptor{
   248  			Prefix:    "test",
   249  			Name:      "foo",
   250  			Timestamp: timestamppb.New(testclock.TestTimeUTC),
   251  		}
   252  		ic := indexChecker{}
   253  		ts := testSource{}
   254  		m := Manifest{
   255  			LUCIProject: "chromium",
   256  			Desc:        desc,
   257  			Source:      &ts,
   258  			LogWriter:   &logB,
   259  			IndexWriter: &indexB,
   260  			CloudLogger: &testCLLogger{},
   261  		}
   262  
   263  		Convey(`A sequence of logs will build a complete index.`, func() {
   264  			ts.add(0, 1, 2, 3, 4, 5, 6)
   265  			So(Archive(m), ShouldBeNil)
   266  
   267  			So(&indexB, ic.shouldContainIndexFor, desc, &logB)
   268  			So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   269  				Desc:            desc,
   270  				LastPrefixIndex: 12,
   271  				LastStreamIndex: 6,
   272  				LogEntryCount:   7,
   273  			})
   274  		})
   275  
   276  		Convey(`A sequence of non-contiguous logs will build a complete index.`, func() {
   277  			ts.add(0, 1, 3, 6)
   278  			So(Archive(m), ShouldBeNil)
   279  
   280  			So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 1, 3, 6)
   281  			So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   282  				Desc:            desc,
   283  				LastPrefixIndex: 12,
   284  				LastStreamIndex: 6,
   285  				LogEntryCount:   4,
   286  			})
   287  		})
   288  
   289  		Convey(`Out of order logs are ignored`, func() {
   290  			Convey(`When StreamIndex is out of order.`, func() {
   291  				ts.add(0, 2, 1, 3)
   292  				So(Archive(m), ShouldBeNil)
   293  
   294  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 2, 3)
   295  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   296  					Desc:            desc,
   297  					LastPrefixIndex: 6,
   298  					LastStreamIndex: 3,
   299  					LogEntryCount:   3,
   300  				})
   301  			})
   302  
   303  			Convey(`When PrefixIndex is out of order.`, func() {
   304  				ts.add(0, 1)
   305  				le := gen(2)
   306  				le.PrefixIndex = 1
   307  				ts.addEntries(le)
   308  				ts.add(3, 4)
   309  				So(Archive(m), ShouldBeNil)
   310  
   311  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 1, 3, 4)
   312  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   313  					Desc:            desc,
   314  					LastPrefixIndex: 8,
   315  					LastStreamIndex: 4,
   316  					LogEntryCount:   4,
   317  				})
   318  			})
   319  
   320  			Convey(`When Sequence is out of order.`, func() {
   321  				ts.add(0, 1)
   322  				le := gen(2)
   323  				le.Sequence = 0
   324  				ts.addEntries(le)
   325  				ts.add(3, 4)
   326  				So(Archive(m), ShouldBeNil)
   327  
   328  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 1, 3, 4)
   329  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   330  					Desc:            desc,
   331  					LastPrefixIndex: 8,
   332  					LastStreamIndex: 4,
   333  					LogEntryCount:   4,
   334  				})
   335  			})
   336  		})
   337  
   338  		Convey(`When TimeOffset is out of order, it is bumped to max.`, func() {
   339  			ts.add(0, 1)
   340  			le := gen(2)
   341  			le.TimeOffset = nil // 0
   342  			ts.addEntries(le)
   343  			ts.add(3, 4)
   344  			So(Archive(m), ShouldBeNil)
   345  
   346  			So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 1, 2, 3, 4)
   347  			So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   348  				Desc:            desc,
   349  				LastPrefixIndex: 8,
   350  				LastStreamIndex: 4,
   351  				LogEntryCount:   5,
   352  			})
   353  		})
   354  
   355  		Convey(`Source errors will be returned`, func() {
   356  			Convey(`nil LogEntry`, func() {
   357  				ts.addLogEntry(nil)
   358  				So(Archive(m), ShouldErrLike, "nil LogEntry")
   359  			})
   360  
   361  			Convey(`Error returned`, func() {
   362  				ts.add(0, 1, 2, 3, 4, 5)
   363  				ts.err = errors.New("test error")
   364  				So(Archive(m), ShouldErrLike, "test error")
   365  			})
   366  		})
   367  
   368  		Convey(`Writer errors will be returned`, func() {
   369  			ts.add(0, 1, 2, 3, 4, 5)
   370  
   371  			Convey(`For log writer errors.`, func() {
   372  				m.LogWriter = &errWriter{m.LogWriter, errors.New("test error")}
   373  				So(errors.SingleError(Archive(m)), ShouldErrLike, "test error")
   374  			})
   375  
   376  			Convey(`For index writer errors.`, func() {
   377  				m.IndexWriter = &errWriter{m.IndexWriter, errors.New("test error")}
   378  				So(errors.SingleError(Archive(m)), ShouldErrLike, "test error")
   379  			})
   380  
   381  			Convey(`When all Writers fail.`, func() {
   382  				m.LogWriter = &errWriter{m.LogWriter, errors.New("test error")}
   383  				m.IndexWriter = &errWriter{m.IndexWriter, errors.New("test error")}
   384  				So(Archive(m), ShouldNotBeNil)
   385  			})
   386  		})
   387  
   388  		Convey(`When building sparse index`, func() {
   389  			ts.add(0, 1, 2, 3, 4, 5)
   390  
   391  			Convey(`Can build an index for every 3 StreamIndex.`, func() {
   392  				m.StreamIndexRange = 3
   393  				So(Archive(m), ShouldBeNil)
   394  
   395  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 3, 5)
   396  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   397  					Desc:            desc,
   398  					LastPrefixIndex: 10,
   399  					LastStreamIndex: 5,
   400  					LogEntryCount:   6,
   401  				})
   402  			})
   403  
   404  			Convey(`Can build an index for every 3 PrefixIndex.`, func() {
   405  				m.PrefixIndexRange = 3
   406  				So(Archive(m), ShouldBeNil)
   407  
   408  				// Note that in our generated logs, PrefixIndex = 2*StreamIndex.
   409  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 2, 4, 5)
   410  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   411  					Desc:            desc,
   412  					LastPrefixIndex: 10,
   413  					LastStreamIndex: 5,
   414  					LogEntryCount:   6,
   415  				})
   416  			})
   417  
   418  			Convey(`Can build an index for every 13 bytes.`, func() {
   419  				ic.fixedSize = 5
   420  				m.ByteRange = 13
   421  				m.sizeFunc = func(pb proto.Message) int {
   422  					// Stub all LogEntry to be 5 bytes.
   423  					return 5
   424  				}
   425  				So(Archive(m), ShouldBeNil)
   426  
   427  				So(&indexB, ic.shouldContainIndexFor, desc, &logB, 0, 2, 5)
   428  				So(indexParams(indexB.Bytes()), ShouldResembleProto, &logpb.LogIndex{
   429  					Desc:            desc,
   430  					LastPrefixIndex: 10,
   431  					LastStreamIndex: 5,
   432  					LogEntryCount:   6,
   433  				})
   434  			})
   435  		})
   436  
   437  		Convey(`Exports entries to Cloud Logs`, func() {
   438  			clogger := m.CloudLogger.(*testCLLogger)
   439  			line := func(msg, del string) *logpb.Text_Line {
   440  				return &logpb.Text_Line{Value: []byte(msg), Delimiter: del}
   441  			}
   442  			sha := sha256.New()
   443  			sha.Write([]byte(m.LUCIProject))
   444  			sha.Write([]byte(desc.Prefix))
   445  			sha.Write([]byte(desc.Name))
   446  			streamIDHash := sha.Sum(nil)
   447  
   448  			Convey(`With nil LogEntry`, func() {
   449  				ts.addLogEntry(nil)
   450  				So(Archive(m), ShouldErrLike, "nil LogEntry")
   451  				So(clogger.entry, ShouldHaveLength, 0)
   452  			})
   453  
   454  			Convey(`With multiple LogEntry(s)`, func() {
   455  				ts.add(123, 456, 789)
   456  				ts.logs[0].GetText().Lines = []*logpb.Text_Line{
   457  					line("this", "\n"),
   458  					line("is", "\n"),
   459  				}
   460  				ts.logs[1].GetText().Lines = []*logpb.Text_Line{line("a complete", "\n")}
   461  				ts.logs[2].GetText().Lines = []*logpb.Text_Line{
   462  					line("line.", "\n"),
   463  				}
   464  
   465  				So(Archive(m), ShouldBeNil)
   466  				So(clogger.entry, ShouldHaveLength, 3)
   467  				So(clogger.entry[0].Payload, ShouldEqual, "this\nis")
   468  				So(clogger.entry[1].Payload, ShouldEqual, "a complete")
   469  				So(clogger.entry[2].Payload, ShouldEqual, "line.")
   470  
   471  				So(clogger.entry[0].Trace, ShouldEqual, fmt.Sprintf("%x", streamIDHash))
   472  				So(clogger.entry[1].Trace, ShouldEqual, fmt.Sprintf("%x", streamIDHash))
   473  				So(clogger.entry[2].Trace, ShouldEqual, fmt.Sprintf("%x", streamIDHash))
   474  			})
   475  
   476  			Convey("Skip, if sum(tags) is too big", func() {
   477  				ts.add(123)
   478  				str := make([]byte, maxTagSum/2+100)
   479  				for i := 0; i < len(str); i++ {
   480  					str[i] = "1234567890"[i%10]
   481  				}
   482  				desc.Tags = map[string]string{
   483  					string(str): string(str),
   484  				}
   485  				So(Archive(m), ShouldBeNil)
   486  				So(clogger.entry, ShouldHaveLength, 0)
   487  			})
   488  		})
   489  	})
   490  }