github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/common/gc_test.go (about) 1 package common 2 3 import ( 4 "context" 5 "fmt" 6 "slices" 7 "sync" 8 "testing" 9 "time" 10 11 "github.com/authzed/spicedb/internal/datastore/revisions" 12 "github.com/authzed/spicedb/pkg/datastore" 13 14 "github.com/prometheus/client_golang/prometheus" 15 promclient "github.com/prometheus/client_model/go" 16 "github.com/stretchr/testify/require" 17 ) 18 19 // Fake garbage collector that returns a new incremented revision each time 20 // TxIDBefore is called. 21 type fakeGC struct { 22 lastRevision uint64 23 deleter gcDeleter 24 metrics gcMetrics 25 lock sync.RWMutex 26 } 27 28 type gcMetrics struct { 29 deleteBeforeTxCount int 30 markedCompleteCount int 31 resetGCCompletedCount int 32 } 33 34 func newFakeGC(deleter gcDeleter) fakeGC { 35 return fakeGC{ 36 lastRevision: 0, 37 deleter: deleter, 38 } 39 } 40 41 func (*fakeGC) ReadyState(_ context.Context) (datastore.ReadyState, error) { 42 return datastore.ReadyState{ 43 Message: "Ready", 44 IsReady: true, 45 }, nil 46 } 47 48 func (*fakeGC) Now(_ context.Context) (time.Time, error) { 49 return time.Now(), nil 50 } 51 52 func (gc *fakeGC) TxIDBefore(_ context.Context, _ time.Time) (datastore.Revision, error) { 53 gc.lock.Lock() 54 defer gc.lock.Unlock() 55 56 gc.lastRevision++ 57 58 rev := revisions.NewForTransactionID(gc.lastRevision) 59 60 return rev, nil 61 } 62 63 func (gc *fakeGC) DeleteBeforeTx(_ context.Context, rev datastore.Revision) (DeletionCounts, error) { 64 gc.lock.Lock() 65 defer gc.lock.Unlock() 66 67 gc.metrics.deleteBeforeTxCount++ 68 69 revInt := rev.(revisions.TransactionIDRevision).TransactionID() 70 71 return gc.deleter.DeleteBeforeTx(int64(revInt)) 72 } 73 74 func (gc *fakeGC) HasGCRun() bool { 75 gc.lock.Lock() 76 defer gc.lock.Unlock() 77 78 return gc.metrics.markedCompleteCount > 0 79 } 80 81 func (gc *fakeGC) MarkGCCompleted() { 82 gc.lock.Lock() 83 defer gc.lock.Unlock() 84 85 gc.metrics.markedCompleteCount++ 86 } 87 88 func (gc *fakeGC) ResetGCCompleted() { 89 gc.lock.Lock() 90 defer gc.lock.Unlock() 91 92 gc.metrics.resetGCCompletedCount++ 93 } 94 95 func (gc *fakeGC) GetMetrics() gcMetrics { 96 gc.lock.Lock() 97 defer gc.lock.Unlock() 98 99 return gc.metrics 100 } 101 102 // Allows specifying different deletion behaviors for tests 103 type gcDeleter interface { 104 DeleteBeforeTx(revision int64) (DeletionCounts, error) 105 } 106 107 // Always error trying to perform a delete 108 type alwaysErrorDeleter struct{} 109 110 func (alwaysErrorDeleter) DeleteBeforeTx(_ int64) (DeletionCounts, error) { 111 return DeletionCounts{}, fmt.Errorf("delete error") 112 } 113 114 // Only error on specific revisions 115 type revisionErrorDeleter struct { 116 errorOnRevisions []int64 117 } 118 119 func (d revisionErrorDeleter) DeleteBeforeTx(revision int64) (DeletionCounts, error) { 120 if slices.Contains(d.errorOnRevisions, revision) { 121 return DeletionCounts{}, fmt.Errorf("delete error") 122 } 123 124 return DeletionCounts{}, nil 125 } 126 127 func TestGCFailureBackoff(t *testing.T) { 128 localCounter := prometheus.NewCounter(gcFailureCounterConfig) 129 reg := prometheus.NewRegistry() 130 require.NoError(t, reg.Register(localCounter)) 131 132 ctx, cancel := context.WithCancel(context.Background()) 133 defer cancel() 134 go func() { 135 gc := newFakeGC(alwaysErrorDeleter{}) 136 require.Error(t, startGarbageCollectorWithMaxElapsedTime(ctx, &gc, 100*time.Millisecond, 1*time.Second, 1*time.Nanosecond, 1*time.Minute, localCounter)) 137 }() 138 time.Sleep(200 * time.Millisecond) 139 cancel() 140 141 metrics, err := reg.Gather() 142 require.NoError(t, err) 143 var mf *promclient.MetricFamily 144 for _, metric := range metrics { 145 if metric.GetName() == "spicedb_datastore_gc_failure_total" { 146 mf = metric 147 } 148 } 149 require.Greater(t, *(mf.GetMetric()[0].Counter.Value), 100.0, "MaxElapsedTime=1ns did not cause backoff to get ignored") 150 151 localCounter = prometheus.NewCounter(gcFailureCounterConfig) 152 reg = prometheus.NewRegistry() 153 require.NoError(t, reg.Register(localCounter)) 154 ctx, cancel = context.WithCancel(context.Background()) 155 defer cancel() 156 go func() { 157 gc := newFakeGC(alwaysErrorDeleter{}) 158 require.Error(t, startGarbageCollectorWithMaxElapsedTime(ctx, &gc, 100*time.Millisecond, 0, 1*time.Second, 1*time.Minute, localCounter)) 159 }() 160 time.Sleep(200 * time.Millisecond) 161 cancel() 162 163 metrics, err = reg.Gather() 164 require.NoError(t, err) 165 for _, metric := range metrics { 166 if metric.GetName() == "spicedb_datastore_gc_failure_total" { 167 mf = metric 168 } 169 } 170 require.Less(t, *(mf.GetMetric()[0].Counter.Value), 3.0, "MaxElapsedTime=0 should have not caused backoff to get ignored") 171 } 172 173 // Ensure the garbage collector interval is reset after recovering from an 174 // error. The garbage collector should not continue to use the exponential 175 // backoff interval that is activated on error. 176 func TestGCFailureBackoffReset(t *testing.T) { 177 gc := newFakeGC(revisionErrorDeleter{ 178 // Error on revisions 1 - 5, giving the exponential 179 // backoff enough time to fail the test if the interval 180 // is not reset properly. 181 errorOnRevisions: []int64{1, 2, 3, 4, 5}, 182 }) 183 184 ctx, cancel := context.WithCancel(context.Background()) 185 defer cancel() 186 187 go func() { 188 interval := 10 * time.Millisecond 189 window := 10 * time.Second 190 timeout := 1 * time.Minute 191 192 require.Error(t, StartGarbageCollector(ctx, &gc, interval, window, timeout)) 193 }() 194 195 time.Sleep(500 * time.Millisecond) 196 cancel() 197 198 // The next interval should have been reset after recovering from the error. 199 // If it is not reset, the last exponential backoff interval will not give 200 // the GC enough time to run. 201 require.Greater(t, gc.GetMetrics().markedCompleteCount, 20, "Next interval was not reset with backoff") 202 }