github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/chunk/chunk_test.go (about) 1 package chunk 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "math/rand" 8 "testing" 9 10 "github.com/chmduquesne/rollinghash/buzhash64" 11 units "github.com/docker/go-units" 12 "github.com/pachyderm/pachyderm/src/client/pkg/require" 13 "github.com/pachyderm/pachyderm/src/server/pkg/dbutil" 14 "github.com/pachyderm/pachyderm/src/server/pkg/obj" 15 "github.com/pachyderm/pachyderm/src/server/pkg/storage/track" 16 "github.com/pachyderm/pachyderm/src/server/pkg/testutil/random" 17 "github.com/pachyderm/pachyderm/src/server/pkg/uuid" 18 "modernc.org/mathutil" 19 ) 20 21 type test struct { 22 maxAnnotationSize int 23 n int 24 } 25 26 func (t test) name() string { 27 return fmt.Sprintf("Max Annotation Size: %v, Data Size: %v", units.HumanSize(float64(t.maxAnnotationSize)), units.HumanSize(float64(t.n))) 28 } 29 30 var tests = []test{ 31 test{1 * units.KB, 1 * units.KB}, 32 test{1 * units.KB, 1 * units.MB}, 33 test{1 * units.MB, 100 * units.MB}, 34 test{10 * units.MB, 100 * units.MB}, 35 } 36 37 func TestWriteThenRead(t *testing.T) { 38 _, chunks := newTestStorage(t) 39 msg := random.SeedRand() 40 for _, test := range tests { 41 t.Run(test.name(), func(t *testing.T) { 42 // Generate set of annotations. 43 as := generateAnnotations(test) 44 // Write then read the set of annotations. 45 writeAnnotations(t, chunks, as, msg) 46 readAnnotations(t, chunks, as, msg) 47 }) 48 } 49 } 50 51 func TestCopy(t *testing.T) { 52 _, chunks := newTestStorage(t) 53 msg := random.SeedRand() 54 for _, test := range tests { 55 t.Run(test.name(), func(t *testing.T) { 56 // Generate two sets of annotations. 57 as1 := generateAnnotations(test) 58 as2 := generateAnnotations(test) 59 // Write the two sets of annotations. 60 writeAnnotations(t, chunks, as1, msg) 61 writeAnnotations(t, chunks, as2, msg) 62 // Initial chunk count. 63 var initialChunkCount int64 64 require.NoError(t, chunks.List(context.Background(), func(_ string) error { 65 initialChunkCount++ 66 return nil 67 }), msg) 68 // Copy the annotations from the two sets of annotations. 69 as := append(as1, as2...) 70 cb := func(annotations []*Annotation) error { 71 for _, a := range annotations { 72 testA := a.Data.(*testAnnotation) 73 if a.NextDataRef != nil { 74 testA.dataRefs = append(testA.dataRefs, a.NextDataRef) 75 } 76 } 77 return nil 78 } 79 w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb) 80 copyAnnotations(t, chunks, w, as, msg) 81 require.NoError(t, w.Close(), msg) 82 // Check that the annotations were correctly copied. 83 readAnnotations(t, chunks, as, msg) 84 // Check that at least one chunk was copied when connecting the two sets of annotations. 85 var finalChunkCount int64 86 require.NoError(t, chunks.List(context.Background(), func(_ string) error { 87 finalChunkCount++ 88 return nil 89 }), msg) 90 require.True(t, finalChunkCount < initialChunkCount*2, msg) 91 }) 92 } 93 } 94 95 func BenchmarkWriter(b *testing.B) { 96 _, chunks := newTestStorage(b) 97 seq := RandSeq(100 * units.MB) 98 b.SetBytes(100 * units.MB) 99 b.ResetTimer() 100 for i := 0; i < b.N; i++ { 101 cb := func(_ []*Annotation) error { return nil } 102 w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb) 103 for i := 0; i < 100; i++ { 104 w.Annotate(&Annotation{}) 105 _, err := w.Write(seq[i*units.MB : (i+1)*units.MB]) 106 require.NoError(b, err) 107 } 108 require.NoError(b, w.Close()) 109 } 110 } 111 112 func BenchmarkRollingHash(b *testing.B) { 113 seq := RandSeq(100 * units.MB) 114 b.SetBytes(100 * units.MB) 115 hash := buzhash64.New() 116 splitMask := uint64((1 << uint64(23)) - 1) 117 b.ResetTimer() 118 for i := 0; i < b.N; i++ { 119 hash.Reset() 120 hash.Write(initialWindow) 121 for _, bt := range seq { 122 hash.Roll(bt) 123 //lint:ignore SA9003 benchmark is simulating exact usecase 124 if hash.Sum64()&splitMask == 0 { 125 } 126 } 127 } 128 } 129 130 type testAnnotation struct { 131 data []byte 132 dataRefs []*DataRef 133 } 134 135 func generateAnnotations(t test) []*testAnnotation { 136 var as []*testAnnotation 137 for t.n > 0 { 138 a := &testAnnotation{} 139 a.data = RandSeq(mathutil.Min(rand.Intn(t.maxAnnotationSize)+1, t.n)) 140 t.n -= len(a.data) 141 as = append(as, a) 142 } 143 return as 144 } 145 146 func writeAnnotations(t *testing.T, chunks *Storage, annotations []*testAnnotation, msg string) { 147 t.Run("Write", func(t *testing.T) { 148 cb := func(annotations []*Annotation) error { 149 for _, a := range annotations { 150 testA := a.Data.(*testAnnotation) 151 // TODO: Document why NextDataRef can be nil. 152 if a.NextDataRef != nil { 153 testA.dataRefs = append(testA.dataRefs, a.NextDataRef) 154 } 155 } 156 return nil 157 } 158 w := chunks.NewWriter(context.Background(), uuid.NewWithoutDashes(), cb) 159 for _, a := range annotations { 160 require.NoError(t, w.Annotate(&Annotation{ 161 Data: a, 162 })) 163 _, err := w.Write(a.data) 164 require.NoError(t, err, msg) 165 } 166 require.NoError(t, w.Close(), msg) 167 }) 168 } 169 170 func copyAnnotations(t *testing.T, chunks *Storage, w *Writer, annotations []*testAnnotation, msg string) { 171 t.Run("Copy", func(t *testing.T) { 172 for _, a := range annotations { 173 dataRefs := a.dataRefs 174 a.dataRefs = nil 175 require.NoError(t, w.Annotate(&Annotation{ 176 Data: a, 177 })) 178 for _, dataRef := range dataRefs { 179 require.NoError(t, w.Copy(dataRef)) 180 } 181 } 182 }) 183 } 184 185 func readAnnotations(t *testing.T, chunks *Storage, annotations []*testAnnotation, msg string) { 186 t.Run("Read", func(t *testing.T) { 187 for _, a := range annotations { 188 r := chunks.NewReader(context.Background(), a.dataRefs) 189 buf := &bytes.Buffer{} 190 require.NoError(t, r.Get(buf), msg) 191 require.Equal(t, 0, bytes.Compare(a.data, buf.Bytes()), msg) 192 } 193 }) 194 } 195 196 // newTestStorage is like NewTestStorage except it doesn't need an external tracker 197 // it is for testing this package, not for reuse. 198 func newTestStorage(t testing.TB) (obj.Client, *Storage) { 199 db := dbutil.NewTestDB(t) 200 tr := track.NewTestTracker(t, db) 201 return NewTestStorage(t, db, tr) 202 }