github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/proxy/hedging.go (about) 1 package proxy 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/benbjohnson/clock" 10 "github.com/influxdata/tdigest" 11 "github.com/prometheus/client_golang/prometheus" 12 "github.com/prometheus/client_golang/prometheus/promauto" 13 14 log "github.com/authzed/spicedb/internal/logging" 15 "github.com/authzed/spicedb/pkg/datastore" 16 "github.com/authzed/spicedb/pkg/datastore/options" 17 core "github.com/authzed/spicedb/pkg/proto/core/v1" 18 ) 19 20 var hedgeableCount = promauto.NewCounter(prometheus.CounterOpts{ 21 Namespace: "spicedb", 22 Subsystem: "datastore", 23 Name: "hedgeable_requests_total", 24 Help: "total number of datastore requests which are eligible for hedging", 25 }) 26 27 var hedgedCount = promauto.NewCounter(prometheus.CounterOpts{ 28 Namespace: "spicedb", 29 Subsystem: "datastore", 30 Name: "hedged_requests_total", 31 Help: "total number of requests which have been hedged", 32 }) 33 34 const ( 35 minMaxRequestsThreshold = 1000 36 defaultTDigestCompression = float64(1000) 37 ) 38 39 type subrequest func(ctx context.Context, responseReady chan<- struct{}) 40 41 type hedger func(ctx context.Context, req subrequest) 42 43 func newHedger( 44 timeSource clock.Clock, 45 initialSlowRequestThreshold time.Duration, 46 maxSampleCount uint64, 47 quantile float64, 48 ) hedger { 49 var digestLock sync.Mutex 50 51 digests := []*tdigest.TDigest{ 52 tdigest.NewWithCompression(defaultTDigestCompression), 53 tdigest.NewWithCompression(defaultTDigestCompression), 54 } 55 56 // We pre-load the first digest with the initial slow request threshold at a weight 57 // such that we have reasonable data for our first request and so the other digest 58 // will be out of phase with this one, meaning when the first digest gets to 59 // maxSampleCount, the other digest will already be 50% warmed up. 60 digests[0].Add(initialSlowRequestThreshold.Seconds(), float64(maxSampleCount)/2) 61 62 return func(ctx context.Context, req subrequest) { 63 responseReady := make(chan struct{}, 1) 64 65 digestLock.Lock() 66 slowRequestThresholdSeconds := digests[0].Quantile(quantile) 67 digestLock.Unlock() 68 slowRequestThreshold := time.Duration(slowRequestThresholdSeconds * float64(time.Second)) 69 70 timer := timeSource.Timer(slowRequestThreshold) 71 originalStart := timeSource.Now() 72 73 ctx, cancel := context.WithCancel(ctx) 74 defer cancel() 75 hedgeableCount.Inc() 76 go req(ctx, responseReady) 77 78 var duration time.Duration 79 80 select { 81 case <-responseReady: 82 duration = timeSource.Since(originalStart) 83 case <-timer.C: 84 log.Ctx(ctx).Debug().Dur("after", slowRequestThreshold).Msg("sending hedged datastore request") 85 hedgedCount.Inc() 86 87 hedgedResponseReady := make(chan struct{}, 1) 88 hedgedStart := timeSource.Now() 89 go req(ctx, hedgedResponseReady) 90 91 select { 92 case <-responseReady: 93 duration = timeSource.Since(originalStart) 94 case <-hedgedResponseReady: 95 duration = timeSource.Since(hedgedStart) 96 } 97 } 98 99 digestLock.Lock() 100 defer digestLock.Unlock() 101 102 // Swap the current active digest if it has too many samples 103 if digests[0].Count() >= float64(maxSampleCount) { 104 log.Ctx(ctx).Trace().Float64("count", digests[0].Count()).Msg("switching to next hedging digest") 105 exhausted := digests[0] 106 digests = digests[1:] 107 exhausted.Reset() 108 digests = append(digests, exhausted) 109 } 110 111 // Record the duration to all candidate digests 112 log.Ctx(ctx).Trace().Dur("duration", duration).Msg("adding sample duration to statistics") 113 durSeconds := duration.Seconds() 114 for _, digest := range digests { 115 digest.Add(durSeconds, 1) 116 } 117 } 118 } 119 120 type hedgingProxy struct { 121 datastore.Datastore 122 123 revisionHedger hedger 124 headRevisionHedger hedger 125 readNamespaceHedger hedger 126 queryTuplesHedger hedger 127 } 128 129 // NewHedgingProxy creates a proxy which performs request hedging on read operations 130 // according to the specified config. 131 func NewHedgingProxy( 132 delegate datastore.Datastore, 133 initialSlowRequestThreshold time.Duration, 134 maxSampleCount uint64, 135 hedgingQuantile float64, 136 ) (datastore.Datastore, error) { 137 return newHedgingProxyWithTimeSource( 138 delegate, 139 initialSlowRequestThreshold, 140 maxSampleCount, 141 hedgingQuantile, 142 clock.New(), 143 ) 144 } 145 146 func newHedgingProxyWithTimeSource( 147 delegate datastore.Datastore, 148 initialSlowRequestThreshold time.Duration, 149 maxSampleCount uint64, 150 hedgingQuantile float64, 151 timeSource clock.Clock, 152 ) (datastore.Datastore, error) { 153 if initialSlowRequestThreshold < 0 { 154 return nil, fmt.Errorf("initial slow request threshold negative") 155 } 156 157 if maxSampleCount < minMaxRequestsThreshold { 158 return nil, fmt.Errorf(fmt.Sprintf("maxSampleCount must be >=%d", minMaxRequestsThreshold)) 159 } 160 161 if hedgingQuantile <= 0.0 || hedgingQuantile >= 1.0 { 162 return nil, fmt.Errorf("hedgingQuantile must be in the range (0.0-1.0) exclusive") 163 } 164 165 return hedgingProxy{ 166 delegate, 167 newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile), 168 newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile), 169 newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile), 170 newHedger(timeSource, initialSlowRequestThreshold, maxSampleCount, hedgingQuantile), 171 }, nil 172 } 173 174 func (hp hedgingProxy) Unwrap() datastore.Datastore { 175 return hp.Datastore 176 } 177 178 func (hp hedgingProxy) OptimizedRevision(ctx context.Context) (rev datastore.Revision, err error) { 179 var once sync.Once 180 subreq := func(ctx context.Context, responseReady chan<- struct{}) { 181 delegatedRev, delegatedErr := hp.Datastore.OptimizedRevision(ctx) 182 once.Do(func() { 183 rev = delegatedRev 184 err = delegatedErr 185 }) 186 responseReady <- struct{}{} 187 } 188 189 hp.revisionHedger(ctx, subreq) 190 191 return 192 } 193 194 func (hp hedgingProxy) HeadRevision(ctx context.Context) (rev datastore.Revision, err error) { 195 var once sync.Once 196 subreq := func(ctx context.Context, responseReady chan<- struct{}) { 197 delegatedRev, delegatedErr := hp.Datastore.HeadRevision(ctx) 198 once.Do(func() { 199 rev = delegatedRev 200 err = delegatedErr 201 }) 202 responseReady <- struct{}{} 203 } 204 205 hp.headRevisionHedger(ctx, subreq) 206 207 return 208 } 209 210 func (hp hedgingProxy) SnapshotReader(rev datastore.Revision) datastore.Reader { 211 delegate := hp.Datastore.SnapshotReader(rev) 212 return &hedgingReader{delegate, hp} 213 } 214 215 type hedgingReader struct { 216 datastore.Reader 217 218 p hedgingProxy 219 } 220 221 func (hp hedgingReader) ReadNamespaceByName( 222 ctx context.Context, 223 nsName string, 224 ) (ns *core.NamespaceDefinition, createdAt datastore.Revision, err error) { 225 var once sync.Once 226 subreq := func(ctx context.Context, responseReady chan<- struct{}) { 227 delegatedNs, delegatedRev, delegatedErr := hp.Reader.ReadNamespaceByName(ctx, nsName) 228 once.Do(func() { 229 ns = delegatedNs 230 createdAt = delegatedRev 231 err = delegatedErr 232 }) 233 responseReady <- struct{}{} 234 } 235 236 hp.p.readNamespaceHedger(ctx, subreq) 237 238 return 239 } 240 241 func (hp hedgingReader) QueryRelationships( 242 ctx context.Context, 243 filter datastore.RelationshipsFilter, 244 options ...options.QueryOptionsOption, 245 ) (iter datastore.RelationshipIterator, err error) { 246 return hp.executeQuery(ctx, func(c context.Context) (datastore.RelationshipIterator, error) { 247 return hp.Reader.QueryRelationships(ctx, filter, options...) 248 }) 249 } 250 251 func (hp hedgingReader) ReverseQueryRelationships( 252 ctx context.Context, 253 subjectsFilter datastore.SubjectsFilter, 254 opts ...options.ReverseQueryOptionsOption, 255 ) (iter datastore.RelationshipIterator, err error) { 256 return hp.executeQuery(ctx, func(c context.Context) (datastore.RelationshipIterator, error) { 257 return hp.Reader.ReverseQueryRelationships(ctx, subjectsFilter, opts...) 258 }) 259 } 260 261 func (hp hedgingReader) executeQuery( 262 ctx context.Context, 263 exec func(context.Context) (datastore.RelationshipIterator, error), 264 ) (delegateIterator datastore.RelationshipIterator, err error) { 265 var once sync.Once 266 subreq := func(ctx context.Context, responseReady chan<- struct{}) { 267 tempIterator, tempErr := exec(ctx) 268 resultsUsed := false 269 once.Do(func() { 270 delegateIterator = tempIterator 271 err = tempErr 272 resultsUsed = true 273 }) 274 // close the unused iterator 275 // only the first call to once.Do will run the function, so whichever 276 // hedged request is slower will have resultsUsed = false 277 if !resultsUsed && tempErr == nil { 278 tempIterator.Close() 279 } 280 responseReady <- struct{}{} 281 } 282 283 hp.p.queryTuplesHedger(ctx, subreq) 284 285 return 286 }