github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/proxy/hedging.go (about)

     1  package proxy
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/benbjohnson/clock"
    10  	"github.com/influxdata/tdigest"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/prometheus/client_golang/prometheus/promauto"
    13  
    14  	log "github.com/authzed/spicedb/internal/logging"
    15  	"github.com/authzed/spicedb/pkg/datastore"
    16  	"github.com/authzed/spicedb/pkg/datastore/options"
    17  	core "github.com/authzed/spicedb/pkg/proto/core/v1"
    18  )
    19  
    20  var hedgeableCount = promauto.NewCounter(prometheus.CounterOpts{
    21  	Namespace: "spicedb",
    22  	Subsystem: "datastore",
    23  	Name:      "hedgeable_requests_total",
    24  	Help:      "total number of datastore requests which are eligible for hedging",
    25  })
    26  
    27  var hedgedCount = promauto.NewCounter(prometheus.CounterOpts{
    28  	Namespace: "spicedb",
    29  	Subsystem: "datastore",
    30  	Name:      "hedged_requests_total",
    31  	Help:      "total number of requests which have been hedged",
    32  })
    33  
    34  const (
    35  	minMaxRequestsThreshold   = 1000
    36  	defaultTDigestCompression = float64(1000)
    37  )
    38  
    39  type subrequest func(ctx context.Context, responseReady chan<- struct{})
    40  
    41  type hedger func(ctx context.Context, req subrequest)
    42  
    43  func newHedger(
    44  	timeSource clock.Clock,
    45  	initialSlowRequestThreshold time.Duration,
    46  	maxSampleCount uint64,
    47  	quantile float64,
    48  ) hedger {
    49  	var digestLock sync.Mutex
    50  
    51  	digests := []*tdigest.TDigest{
    52  		tdigest.NewWithCompression(defaultTDigestCompression),
    53  		tdigest.NewWithCompression(defaultTDigestCompression),
    54  	}
    55  
    56  	// We pre-load the first digest with the initial slow request threshold at a weight
    57  	// such that we have reasonable data for our first request and so the other digest
    58  	// will be out of phase with this one, meaning when the first digest gets to
    59  	// maxSampleCount, the other digest will already be 50% warmed up.
    60  	digests[0].Add(initialSlowRequestThreshold.Seconds(), float64(maxSampleCount)/2)
    61  
    62  	return func(ctx context.Context, req subrequest) {
    63  		responseReady := make(chan struct{}, 1)
    64  
    65  		digestLock.Lock()
    66  		slowRequestThresholdSeconds := digests[0].Quantile(quantile)
    67  		digestLock.Unlock()
    68  		slowRequestThreshold := time.Duration(slowRequestThresholdSeconds * float64(time.Second))
    69  
    70  		timer := timeSource.Timer(slowRequestThreshold)
    71  		originalStart := timeSource.Now()
    72  
    73  		ctx, cancel := context.WithCancel(ctx)
    74  		defer cancel()
    75  		hedgeableCount.Inc()
    76  		go req(ctx, responseReady)
    77  
    78  		var duration time.Duration
    79  
    80  		select {
    81  		case <-responseReady:
    82  			duration = timeSource.Since(originalStart)
    83  		case <-timer.C:
    84  			log.Ctx(ctx).Debug().Dur("after", slowRequestThreshold).Msg("sending hedged datastore request")
    85  			hedgedCount.Inc()
    86  
    87  			hedgedResponseReady := make(chan struct{}, 1)
    88  			hedgedStart := timeSource.Now()
    89  			go req(ctx, hedgedResponseReady)
    90  
    91  			select {
    92  			case <-responseReady:
    93  				duration = timeSource.Since(originalStart)
    94  			case <-hedgedResponseReady:
    95  				duration = timeSource.Since(hedgedStart)
    96  			}
    97  		}
    98  
    99  		digestLock.Lock()
   100  		defer digestLock.Unlock()
   101  
   102  		// Swap the current active digest if it has too many samples
   103  		if digests[0].Count() >= float64(maxSampleCount) {
   104  			log.Ctx(ctx).Trace().Float64("count", digests[0].Count()).Msg("switching to next hedging digest")
   105  			exhausted := digests[0]
   106  			digests = digests[1:]
   107  			exhausted.Reset()
   108  			digests = append(digests, exhausted)
   109  		}
   110  
   111  		// Record the duration to all candidate digests
   112  		log.Ctx(ctx).Trace().Dur("duration", duration).Msg("adding sample duration to statistics")
   113  		durSeconds := duration.Seconds()
   114  		for _, digest := range digests {
   115  			digest.Add(durSeconds, 1)
   116  		}
   117  	}
   118  }
   119  
   120  type hedgingProxy struct {
   121  	datastore.Datastore
   122  
   123  	revisionHedger      hedger
   124  	headRevisionHedger  hedger
   125  	readNamespaceHedger hedger
   126  	queryTuplesHedger   hedger
   127  }
   128  
   129  // NewHedgingProxy creates a proxy which performs request hedging on read operations
   130  // according to the specified config.
   131  func NewHedgingProxy(
   132  	delegate datastore.Datastore,
   133  	initialSlowRequestThreshold time.Duration,
   134  	maxSampleCount uint64,
   135  	hedgingQuantile float64,
   136  ) (datastore.Datastore, error) {
   137  	return newHedgingProxyWithTimeSource(
   138  		delegate,
   139  		initialSlowRequestThreshold,
   140  		maxSampleCount,
   141  		hedgingQuantile,
   142  		clock.New(),
   143  	)
   144  }
   145  
   146  func newHedgingProxyWithTimeSource(
   147  	delegate datastore.Datastore,
   148  	initialSlowRequestThreshold time.Duration,
   149  	maxSampleCount uint64,
   150  	hedgingQuantile float64,
   151  	timeSource clock.Clock,
   152  ) (datastore.Datastore, error) {
   153  	if initialSlowRequestThreshold < 0 {
   154  		return nil, fmt.Errorf("initial slow request threshold negative")
   155  	}
   156  
   157  	if maxSampleCount < minMaxRequestsThreshold {
   158  		return nil, fmt.Errorf(fmt.Sprintf("maxSampleCount must be >=%d", minMaxRequestsThreshold))
   159  	}
   160  
   161  	if hedgingQuantile <= 0.0 || hedgingQuantile >= 1.0 {
   162  		return nil, fmt.Errorf("hedgingQuantile must be in the range (0.0-1.0) exclusive")
   163  	}
   164  
   165  	return hedgingProxy{
   166  		delegate,
   167  		newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile),
   168  		newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile),
   169  		newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile),
   170  		newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile),
   171  	}, nil
   172  }
   173  
   174  func (hp hedgingProxy) Unwrap() datastore.Datastore {
   175  	return hp.Datastore
   176  }
   177  
   178  func (hp hedgingProxy) OptimizedRevision(ctx context.Context) (rev datastore.Revision, err error) {
   179  	var once sync.Once
   180  	subreq := func(ctx context.Context, responseReady chan<- struct{}) {
   181  		delegatedRev, delegatedErr := hp.Datastore.OptimizedRevision(ctx)
   182  		once.Do(func() {
   183  			rev = delegatedRev
   184  			err = delegatedErr
   185  		})
   186  		responseReady <- struct{}{}
   187  	}
   188  
   189  	hp.revisionHedger(ctx, subreq)
   190  
   191  	return
   192  }
   193  
   194  func (hp hedgingProxy) HeadRevision(ctx context.Context) (rev datastore.Revision, err error) {
   195  	var once sync.Once
   196  	subreq := func(ctx context.Context, responseReady chan<- struct{}) {
   197  		delegatedRev, delegatedErr := hp.Datastore.HeadRevision(ctx)
   198  		once.Do(func() {
   199  			rev = delegatedRev
   200  			err = delegatedErr
   201  		})
   202  		responseReady <- struct{}{}
   203  	}
   204  
   205  	hp.headRevisionHedger(ctx, subreq)
   206  
   207  	return
   208  }
   209  
   210  func (hp hedgingProxy) SnapshotReader(rev datastore.Revision) datastore.Reader {
   211  	delegate := hp.Datastore.SnapshotReader(rev)
   212  	return &hedgingReader{delegate, hp}
   213  }
   214  
   215  type hedgingReader struct {
   216  	datastore.Reader
   217  
   218  	p hedgingProxy
   219  }
   220  
   221  func (hp hedgingReader) ReadNamespaceByName(
   222  	ctx context.Context,
   223  	nsName string,
   224  ) (ns *core.NamespaceDefinition, createdAt datastore.Revision, err error) {
   225  	var once sync.Once
   226  	subreq := func(ctx context.Context, responseReady chan<- struct{}) {
   227  		delegatedNs, delegatedRev, delegatedErr := hp.Reader.ReadNamespaceByName(ctx, nsName)
   228  		once.Do(func() {
   229  			ns = delegatedNs
   230  			createdAt = delegatedRev
   231  			err = delegatedErr
   232  		})
   233  		responseReady <- struct{}{}
   234  	}
   235  
   236  	hp.p.readNamespaceHedger(ctx, subreq)
   237  
   238  	return
   239  }
   240  
   241  func (hp hedgingReader) QueryRelationships(
   242  	ctx context.Context,
   243  	filter datastore.RelationshipsFilter,
   244  	options ...options.QueryOptionsOption,
   245  ) (iter datastore.RelationshipIterator, err error) {
   246  	return hp.executeQuery(ctx, func(c context.Context) (datastore.RelationshipIterator, error) {
   247  		return hp.Reader.QueryRelationships(ctx, filter, options...)
   248  	})
   249  }
   250  
   251  func (hp hedgingReader) ReverseQueryRelationships(
   252  	ctx context.Context,
   253  	subjectsFilter datastore.SubjectsFilter,
   254  	opts ...options.ReverseQueryOptionsOption,
   255  ) (iter datastore.RelationshipIterator, err error) {
   256  	return hp.executeQuery(ctx, func(c context.Context) (datastore.RelationshipIterator, error) {
   257  		return hp.Reader.ReverseQueryRelationships(ctx, subjectsFilter, opts...)
   258  	})
   259  }
   260  
   261  func (hp hedgingReader) executeQuery(
   262  	ctx context.Context,
   263  	exec func(context.Context) (datastore.RelationshipIterator, error),
   264  ) (delegateIterator datastore.RelationshipIterator, err error) {
   265  	var once sync.Once
   266  	subreq := func(ctx context.Context, responseReady chan<- struct{}) {
   267  		tempIterator, tempErr := exec(ctx)
   268  		resultsUsed := false
   269  		once.Do(func() {
   270  			delegateIterator = tempIterator
   271  			err = tempErr
   272  			resultsUsed = true
   273  		})
   274  		// close the unused iterator
   275  		// only the first call to once.Do will run the function, so whichever
   276  		// hedged request is slower will have resultsUsed = false
   277  		if !resultsUsed && tempErr == nil {
   278  			tempIterator.Close()
   279  		}
   280  		responseReady <- struct{}{}
   281  	}
   282  
   283  	hp.p.queryTuplesHedger(ctx, subreq)
   284  
   285  	return
   286  }