github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/chunk/chunk_test.go (about)

     1  package chunk
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"math/rand"
     8  	"testing"
     9  
    10  	"github.com/chmduquesne/rollinghash/buzhash64"
    11  	units "github.com/docker/go-units"
    12  	"github.com/pachyderm/pachyderm/src/client/pkg/require"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/dbutil"
    14  	"github.com/pachyderm/pachyderm/src/server/pkg/obj"
    15  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/track"
    16  	"github.com/pachyderm/pachyderm/src/server/pkg/testutil/random"
    17  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    18  	"modernc.org/mathutil"
    19  )
    20  
    21  type test struct {
    22  	maxAnnotationSize int
    23  	n                 int
    24  }
    25  
    26  func (t test) name() string {
    27  	return fmt.Sprintf("Max Annotation Size: %v, Data Size: %v", units.HumanSize(float64(t.maxAnnotationSize)), units.HumanSize(float64(t.n)))
    28  }
    29  
    30  var tests = []test{
    31  	test{1 * units.KB, 1 * units.KB},
    32  	test{1 * units.KB, 1 * units.MB},
    33  	test{1 * units.MB, 100 * units.MB},
    34  	test{10 * units.MB, 100 * units.MB},
    35  }
    36  
    37  func TestWriteThenRead(t *testing.T) {
    38  	_, chunks := newTestStorage(t)
    39  	msg := random.SeedRand()
    40  	for _, test := range tests {
    41  		t.Run(test.name(), func(t *testing.T) {
    42  			// Generate set of annotations.
    43  			as := generateAnnotations(test)
    44  			// Write then read the set of annotations.
    45  			writeAnnotations(t, chunks, as, msg)
    46  			readAnnotations(t, chunks, as, msg)
    47  		})
    48  	}
    49  }
    50  
    51  func TestCopy(t *testing.T) {
    52  	_, chunks := newTestStorage(t)
    53  	msg := random.SeedRand()
    54  	for _, test := range tests {
    55  		t.Run(test.name(), func(t *testing.T) {
    56  			// Generate two sets of annotations.
    57  			as1 := generateAnnotations(test)
    58  			as2 := generateAnnotations(test)
    59  			// Write the two sets of annotations.
    60  			writeAnnotations(t, chunks, as1, msg)
    61  			writeAnnotations(t, chunks, as2, msg)
    62  			// Initial chunk count.
    63  			var initialChunkCount int64
    64  			require.NoError(t, chunks.List(context.Background(), func(_ string) error {
    65  				initialChunkCount++
    66  				return nil
    67  			}), msg)
    68  			// Copy the annotations from the two sets of annotations.
    69  			as := append(as1, as2...)
    70  			cb := func(annotations []*Annotation) error {
    71  				for _, a := range annotations {
    72  					testA := a.Data.(*testAnnotation)
    73  					if a.NextDataRef != nil {
    74  						testA.dataRefs = append(testA.dataRefs, a.NextDataRef)
    75  					}
    76  				}
    77  				return nil
    78  			}
    79  			w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb)
    80  			copyAnnotations(t, chunks, w, as, msg)
    81  			require.NoError(t, w.Close(), msg)
    82  			// Check that the annotations were correctly copied.
    83  			readAnnotations(t, chunks, as, msg)
    84  			// Check that at least one chunk was copied when connecting the two sets of annotations.
    85  			var finalChunkCount int64
    86  			require.NoError(t, chunks.List(context.Background(), func(_ string) error {
    87  				finalChunkCount++
    88  				return nil
    89  			}), msg)
    90  			require.True(t, finalChunkCount < initialChunkCount*2, msg)
    91  		})
    92  	}
    93  }
    94  
    95  func BenchmarkWriter(b *testing.B) {
    96  	_, chunks := newTestStorage(b)
    97  	seq := RandSeq(100 * units.MB)
    98  	b.SetBytes(100 * units.MB)
    99  	b.ResetTimer()
   100  	for i := 0; i < b.N; i++ {
   101  		cb := func(_ []*Annotation) error { return nil }
   102  		w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb)
   103  		for i := 0; i < 100; i++ {
   104  			w.Annotate(&Annotation{})
   105  			_, err := w.Write(seq[i*units.MB : (i+1)*units.MB])
   106  			require.NoError(b, err)
   107  		}
   108  		require.NoError(b, w.Close())
   109  	}
   110  }
   111  
   112  func BenchmarkRollingHash(b *testing.B) {
   113  	seq := RandSeq(100 * units.MB)
   114  	b.SetBytes(100 * units.MB)
   115  	hash := buzhash64.New()
   116  	splitMask := uint64((1 << uint64(23)) - 1)
   117  	b.ResetTimer()
   118  	for i := 0; i < b.N; i++ {
   119  		hash.Reset()
   120  		hash.Write(initialWindow)
   121  		for _, bt := range seq {
   122  			hash.Roll(bt)
   123  			//lint:ignore SA9003 benchmark is simulating exact usecase
   124  			if hash.Sum64()&splitMask == 0 {
   125  			}
   126  		}
   127  	}
   128  }
   129  
   130  type testAnnotation struct {
   131  	data     []byte
   132  	dataRefs []*DataRef
   133  }
   134  
   135  func generateAnnotations(t test) []*testAnnotation {
   136  	var as []*testAnnotation
   137  	for t.n > 0 {
   138  		a := &testAnnotation{}
   139  		a.data = RandSeq(mathutil.Min(rand.Intn(t.maxAnnotationSize)+1, t.n))
   140  		t.n -= len(a.data)
   141  		as = append(as, a)
   142  	}
   143  	return as
   144  }
   145  
   146  func writeAnnotations(t *testing.T, chunks *Storage, annotations []*testAnnotation, msg string) {
   147  	t.Run("Write", func(t *testing.T) {
   148  		cb := func(annotations []*Annotation) error {
   149  			for _, a := range annotations {
   150  				testA := a.Data.(*testAnnotation)
   151  				// TODO: Document why NextDataRef can be nil.
   152  				if a.NextDataRef != nil {
   153  					testA.dataRefs = append(testA.dataRefs, a.NextDataRef)
   154  				}
   155  			}
   156  			return nil
   157  		}
   158  		w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb)
   159  		for _, a := range annotations {
   160  			require.NoError(t, w.Annotate(&Annotation{
   161  				Data: a,
   162  			}))
   163  			_, err := w.Write(a.data)
   164  			require.NoError(t, err, msg)
   165  		}
   166  		require.NoError(t, w.Close(), msg)
   167  	})
   168  }
   169  
   170  func copyAnnotations(t *testing.T, chunks *Storage, w *Writer, annotations []*testAnnotation, msg string) {
   171  	t.Run("Copy", func(t *testing.T) {
   172  		for _, a := range annotations {
   173  			dataRefs := a.dataRefs
   174  			a.dataRefs = nil
   175  			require.NoError(t, w.Annotate(&Annotation{
   176  				Data: a,
   177  			}))
   178  			for _, dataRef := range dataRefs {
   179  				require.NoError(t, w.Copy(dataRef))
   180  			}
   181  		}
   182  	})
   183  }
   184  
   185  func readAnnotations(t *testing.T, chunks *Storage, annotations []*testAnnotation, msg string) {
   186  	t.Run("Read", func(t *testing.T) {
   187  		for _, a := range annotations {
   188  			r := chunks.NewReader(context.Background(), a.dataRefs)
   189  			buf := &bytes.Buffer{}
   190  			require.NoError(t, r.Get(buf), msg)
   191  			require.Equal(t, 0, bytes.Compare(a.data, buf.Bytes()), msg)
   192  		}
   193  	})
   194  }
   195  
   196  // newTestStorage is like NewTestStorage except it doesn't need an external tracker
   197  // it is for testing this package, not for reuse.
   198  func newTestStorage(t testing.TB) (obj.Client, *Storage) {
   199  	db := dbutil.NewTestDB(t)
   200  	tr := track.NewTestTracker(t, db)
   201  	return NewTestStorage(t, db, tr)
   202  }