github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/common/gc_test.go (about)

     1  package common
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"slices"
     7  	"sync"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/authzed/spicedb/internal/datastore/revisions"
    12  	"github.com/authzed/spicedb/pkg/datastore"
    13  
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	promclient "github.com/prometheus/client_model/go"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  // Fake garbage collector that returns a new incremented revision each time
    20  // TxIDBefore is called.
    21  type fakeGC struct {
    22  	lastRevision uint64
    23  	deleter      gcDeleter
    24  	metrics      gcMetrics
    25  	lock         sync.RWMutex
    26  }
    27  
    28  type gcMetrics struct {
    29  	deleteBeforeTxCount   int
    30  	markedCompleteCount   int
    31  	resetGCCompletedCount int
    32  }
    33  
    34  func newFakeGC(deleter gcDeleter) fakeGC {
    35  	return fakeGC{
    36  		lastRevision: 0,
    37  		deleter:      deleter,
    38  	}
    39  }
    40  
    41  func (*fakeGC) ReadyState(_ context.Context) (datastore.ReadyState, error) {
    42  	return datastore.ReadyState{
    43  		Message: "Ready",
    44  		IsReady: true,
    45  	}, nil
    46  }
    47  
    48  func (*fakeGC) Now(_ context.Context) (time.Time, error) {
    49  	return time.Now(), nil
    50  }
    51  
    52  func (gc *fakeGC) TxIDBefore(_ context.Context, _ time.Time) (datastore.Revision, error) {
    53  	gc.lock.Lock()
    54  	defer gc.lock.Unlock()
    55  
    56  	gc.lastRevision++
    57  
    58  	rev := revisions.NewForTransactionID(gc.lastRevision)
    59  
    60  	return rev, nil
    61  }
    62  
    63  func (gc *fakeGC) DeleteBeforeTx(_ context.Context, rev datastore.Revision) (DeletionCounts, error) {
    64  	gc.lock.Lock()
    65  	defer gc.lock.Unlock()
    66  
    67  	gc.metrics.deleteBeforeTxCount++
    68  
    69  	revInt := rev.(revisions.TransactionIDRevision).TransactionID()
    70  
    71  	return gc.deleter.DeleteBeforeTx(int64(revInt))
    72  }
    73  
    74  func (gc *fakeGC) HasGCRun() bool {
    75  	gc.lock.Lock()
    76  	defer gc.lock.Unlock()
    77  
    78  	return gc.metrics.markedCompleteCount > 0
    79  }
    80  
    81  func (gc *fakeGC) MarkGCCompleted() {
    82  	gc.lock.Lock()
    83  	defer gc.lock.Unlock()
    84  
    85  	gc.metrics.markedCompleteCount++
    86  }
    87  
    88  func (gc *fakeGC) ResetGCCompleted() {
    89  	gc.lock.Lock()
    90  	defer gc.lock.Unlock()
    91  
    92  	gc.metrics.resetGCCompletedCount++
    93  }
    94  
    95  func (gc *fakeGC) GetMetrics() gcMetrics {
    96  	gc.lock.Lock()
    97  	defer gc.lock.Unlock()
    98  
    99  	return gc.metrics
   100  }
   101  
   102  // Allows specifying different deletion behaviors for tests
   103  type gcDeleter interface {
   104  	DeleteBeforeTx(revision int64) (DeletionCounts, error)
   105  }
   106  
   107  // Always error trying to perform a delete
   108  type alwaysErrorDeleter struct{}
   109  
   110  func (alwaysErrorDeleter) DeleteBeforeTx(_ int64) (DeletionCounts, error) {
   111  	return DeletionCounts{}, fmt.Errorf("delete error")
   112  }
   113  
   114  // Only error on specific revisions
   115  type revisionErrorDeleter struct {
   116  	errorOnRevisions []int64
   117  }
   118  
   119  func (d revisionErrorDeleter) DeleteBeforeTx(revision int64) (DeletionCounts, error) {
   120  	if slices.Contains(d.errorOnRevisions, revision) {
   121  		return DeletionCounts{}, fmt.Errorf("delete error")
   122  	}
   123  
   124  	return DeletionCounts{}, nil
   125  }
   126  
   127  func TestGCFailureBackoff(t *testing.T) {
   128  	localCounter := prometheus.NewCounter(gcFailureCounterConfig)
   129  	reg := prometheus.NewRegistry()
   130  	require.NoError(t, reg.Register(localCounter))
   131  
   132  	ctx, cancel := context.WithCancel(context.Background())
   133  	defer cancel()
   134  	go func() {
   135  		gc := newFakeGC(alwaysErrorDeleter{})
   136  		require.Error(t, startGarbageCollectorWithMaxElapsedTime(ctx, &gc, 100*time.Millisecond, 1*time.Second, 1*time.Nanosecond, 1*time.Minute, localCounter))
   137  	}()
   138  	time.Sleep(200 * time.Millisecond)
   139  	cancel()
   140  
   141  	metrics, err := reg.Gather()
   142  	require.NoError(t, err)
   143  	var mf *promclient.MetricFamily
   144  	for _, metric := range metrics {
   145  		if metric.GetName() == "spicedb_datastore_gc_failure_total" {
   146  			mf = metric
   147  		}
   148  	}
   149  	require.Greater(t, *(mf.GetMetric()[0].Counter.Value), 100.0, "MaxElapsedTime=1ns did not cause backoff to get ignored")
   150  
   151  	localCounter = prometheus.NewCounter(gcFailureCounterConfig)
   152  	reg = prometheus.NewRegistry()
   153  	require.NoError(t, reg.Register(localCounter))
   154  	ctx, cancel = context.WithCancel(context.Background())
   155  	defer cancel()
   156  	go func() {
   157  		gc := newFakeGC(alwaysErrorDeleter{})
   158  		require.Error(t, startGarbageCollectorWithMaxElapsedTime(ctx, &gc, 100*time.Millisecond, 0, 1*time.Second, 1*time.Minute, localCounter))
   159  	}()
   160  	time.Sleep(200 * time.Millisecond)
   161  	cancel()
   162  
   163  	metrics, err = reg.Gather()
   164  	require.NoError(t, err)
   165  	for _, metric := range metrics {
   166  		if metric.GetName() == "spicedb_datastore_gc_failure_total" {
   167  			mf = metric
   168  		}
   169  	}
   170  	require.Less(t, *(mf.GetMetric()[0].Counter.Value), 3.0, "MaxElapsedTime=0 should have not caused backoff to get ignored")
   171  }
   172  
   173  // Ensure the garbage collector interval is reset after recovering from an
   174  // error. The garbage collector should not continue to use the exponential
   175  // backoff interval that is activated on error.
   176  func TestGCFailureBackoffReset(t *testing.T) {
   177  	gc := newFakeGC(revisionErrorDeleter{
   178  		// Error on revisions 1 - 5, giving the exponential
   179  		// backoff enough time to fail the test if the interval
   180  		// is not reset properly.
   181  		errorOnRevisions: []int64{1, 2, 3, 4, 5},
   182  	})
   183  
   184  	ctx, cancel := context.WithCancel(context.Background())
   185  	defer cancel()
   186  
   187  	go func() {
   188  		interval := 10 * time.Millisecond
   189  		window := 10 * time.Second
   190  		timeout := 1 * time.Minute
   191  
   192  		require.Error(t, StartGarbageCollector(ctx, &gc, interval, window, timeout))
   193  	}()
   194  
   195  	time.Sleep(500 * time.Millisecond)
   196  	cancel()
   197  
   198  	// The next interval should have been reset after recovering from the error.
   199  	// If it is not reset, the last exponential backoff interval will not give
   200  	// the GC enough time to run.
   201  	require.Greater(t, gc.GetMetrics().markedCompleteCount, 20, "Next interval was not reset with backoff")
   202  }