go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/server/collector/coordinator/cache.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package coordinator
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  	"time"
    21  
    22  	"go.chromium.org/luci/common/data/caching/lru"
    23  	log "go.chromium.org/luci/common/logging"
    24  	"go.chromium.org/luci/common/retry/transient"
    25  	"go.chromium.org/luci/common/sync/promise"
    26  	"go.chromium.org/luci/common/tsmon/field"
    27  	"go.chromium.org/luci/common/tsmon/metric"
    28  	"go.chromium.org/luci/logdog/common/types"
    29  )
    30  
    31  const (
    32  	// DefaultSize is the default (maximum) size of the LRU cache.
    33  	DefaultSize = 1024 * 1024
    34  
    35  	// DefaultExpiration is the default expiration value.
    36  	DefaultExpiration = 10 * time.Minute
    37  )
    38  
    39  var (
    40  	tsCache = metric.NewCounter("logdog/collector/coordinator/cache",
    41  		"Metrics for cache uses, tracking hits and misses.",
    42  		nil,
    43  		field.Bool("hit"))
    44  )
    45  
    46  // cache is a Coordinator interface implementation for the Collector service
    47  // that caches remote results locally.
    48  type cache struct {
    49  	Coordinator
    50  
    51  	// expiration is the maximum lifespan of a cache entry. If an entry is older
    52  	// than this, it will be discarded.
    53  	expiration time.Duration
    54  
    55  	// cache is the LRU state cache.
    56  	lru *lru.Cache[cacheEntryKey, *cacheEntry]
    57  }
    58  
    59  // NewCache creates a new Coordinator instance that wraps another Coordinator
    60  // instance with a cache that retains the latest remote Coordinator state in a
    61  // client-side LRU cache.
    62  //
    63  // If size is <= 0, DefaultSize will be used.
    64  // If expiration is <= 0, DefaultExpiration will be used.
    65  func NewCache(c Coordinator, size int, expiration time.Duration) Coordinator {
    66  	if size <= 0 {
    67  		size = DefaultSize
    68  	}
    69  	if expiration <= 0 {
    70  		expiration = DefaultExpiration
    71  	}
    72  
    73  	return &cache{
    74  		Coordinator: c,
    75  		expiration:  expiration,
    76  		lru:         lru.New[cacheEntryKey, *cacheEntry](size),
    77  	}
    78  }
    79  
    80  func (c *cache) getCacheEntry(ctx context.Context, k cacheEntryKey) (*cacheEntry, bool) {
    81  	// Get the cacheEntry from our cache. If it is expired or doesn't exist,
    82  	// generate a new cache entry for this key.
    83  	created := false
    84  	entry, _ := c.lru.Mutate(ctx, k, func(it *lru.Item[*cacheEntry]) *lru.Item[*cacheEntry] {
    85  		// Don't replace an existing entry, unless it has an error or has expired.
    86  		if it != nil {
    87  			return it
    88  		}
    89  
    90  		created = true
    91  		return &lru.Item[*cacheEntry]{
    92  			Value: &cacheEntry{
    93  				cacheEntryKey: k,
    94  				terminalIndex: -1,
    95  			},
    96  			Exp: c.expiration,
    97  		}
    98  	})
    99  	return entry, created
   100  }
   101  
   102  // RegisterStream invokes the wrapped Coordinator's RegisterStream method and
   103  // caches the result. It uses a Promise to cause all simultaneous identical
   104  // RegisterStream requests to block on a single RPC.
   105  func (c *cache) RegisterStream(ctx context.Context, st *LogStreamState, desc []byte) (*LogStreamState, error) {
   106  	entry, created := c.getCacheEntry(ctx, cacheEntryKey{
   107  		project: st.Project,
   108  		path:    st.Path,
   109  	})
   110  	tsCache.Add(ctx, 1, !created)
   111  
   112  	st, err := entry.registerStream(ctx, c.Coordinator, *st, desc)
   113  	if err != nil {
   114  		log.WithError(err).Errorf(ctx, "Error retrieving stream state.")
   115  		return nil, err
   116  	}
   117  
   118  	return st, nil
   119  }
   120  
   121  func (c *cache) TerminateStream(ctx context.Context, r *TerminateRequest) error {
   122  	entry, _ := c.getCacheEntry(ctx, cacheEntryKey{
   123  		project: r.Project,
   124  		path:    r.Path,
   125  	})
   126  	return entry.terminateStream(ctx, c.Coordinator, *r)
   127  }
   128  
   129  // cacheEntryKey is the LRU key for a cacheEntry.
   130  type cacheEntryKey struct {
   131  	project string
   132  	path    types.StreamPath
   133  }
   134  
   135  // cacheEntry is a cached state for a specific log stream.
   136  //
   137  // It contains promises for each singleton operation: one for stream
   138  // registration (registerP), and one for stream termination (terminateP).
   139  //
   140  // There are three states to promise evaluation:
   141  //   - If the promise is nil, it will be populated. Any concurrent requests
   142  //     will block pending population (via lock) and will obtain a reference to
   143  //     the populated promise.
   144  //   - If the promise succeeded, or failed non-transiently, its result will be
   145  //     retained and all subsequent calls will see this result.
   146  //   - If the promise failed transiently, it will be set to nil. This will cause
   147  //     the next caller to generate a new promise (retry). Concurrent users of the
   148  //     transiently-failing Promise will all receive a transient error.
   149  type cacheEntry struct {
   150  	sync.Mutex
   151  	cacheEntryKey
   152  
   153  	// terminalIndex is the cached terminal index. Valid if >= 0.
   154  	//
   155  	// If a TerminateStream RPC succeeds, we will use this value in our returned
   156  	// RegisterStream state.
   157  	terminalIndex types.MessageIndex
   158  
   159  	// registerP is a Promise that is blocking pending stream registration.
   160  	// Upon successful resolution, it will contain a *LogStreamState.
   161  	registerP *promise.Promise
   162  	// terminateP is a Promise that is blocking pending stream termination.
   163  	// Upon successful resolution, it will contain a nil result with no error.
   164  	terminateP *promise.Promise
   165  }
   166  
   167  // registerStream performs a RegisterStream Coordinator RPC.
   168  func (ce *cacheEntry) registerStream(ctx context.Context, coord Coordinator, st LogStreamState, desc []byte) (*LogStreamState, error) {
   169  	// Initialize the registration Promise, if one is not defined.
   170  	//
   171  	// While locked, load the current registration promise and the local
   172  	// terminal index value.
   173  	var (
   174  		p    *promise.Promise
   175  		tidx types.MessageIndex
   176  	)
   177  	ce.withLock(func() {
   178  		if ce.registerP == nil {
   179  			ce.registerP = promise.NewDeferred(func(ctx context.Context) (any, error) {
   180  				st, err := coord.RegisterStream(ctx, &st, desc)
   181  				if err == nil {
   182  					// If the remote state has a terminal index, retain it locally.
   183  					ce.loadRemoteTerminalIndex(st.TerminalIndex)
   184  					return st, nil
   185  				}
   186  
   187  				return nil, err
   188  			})
   189  		}
   190  
   191  		p, tidx = ce.registerP, ce.terminalIndex
   192  	})
   193  
   194  	// Resolve our registration Promise.
   195  	remoteStateIface, err := p.Get(ctx)
   196  	if err != nil {
   197  		// If the promise failed transiently, clear it so that subsequent callers
   198  		// will regenerate a new promise. ONLY clear it if it it is the same
   199  		// promise, as different callers may have already cleared/rengerated it.
   200  		if transient.Tag.In(err) {
   201  			ce.withLock(func() {
   202  				if ce.registerP == p {
   203  					ce.registerP = nil
   204  				}
   205  			})
   206  		}
   207  		return nil, err
   208  	}
   209  	remoteState := remoteStateIface.(*LogStreamState)
   210  
   211  	// If our remote state doesn't include a terminal index and our local state
   212  	// has recorded a successful remote terminal index, return a copy of the
   213  	// remote state with the remote terminal index added.
   214  	if remoteState.TerminalIndex < 0 && tidx >= 0 {
   215  		remoteStateCopy := *remoteState
   216  		remoteStateCopy.TerminalIndex = tidx
   217  		remoteState = &remoteStateCopy
   218  	}
   219  	return remoteState, nil
   220  }
   221  
   222  // terminateStream performs a TerminateStream Coordinator RPC.
   223  func (ce *cacheEntry) terminateStream(ctx context.Context, coord Coordinator, tr TerminateRequest) error {
   224  	// Initialize the termination Promise if one is not defined. Also, grab our
   225  	// cached remote terminal index.
   226  	var (
   227  		p    *promise.Promise
   228  		tidx types.MessageIndex = -1
   229  	)
   230  	ce.withLock(func() {
   231  		if ce.terminateP == nil {
   232  			// We're creating a new promise, so our tr's TerminalIndex will be set.
   233  			ce.terminateP = promise.NewDeferred(func(ctx context.Context) (any, error) {
   234  				// Execute our TerminateStream RPC. If successful, retain the successful
   235  				// terminal index locally.
   236  				err := coord.TerminateStream(ctx, &tr)
   237  				if err == nil {
   238  					// Note that this happens within the Promise body, so this will not
   239  					// conflict with our outer lock.
   240  					ce.loadRemoteTerminalIndex(tr.TerminalIndex)
   241  				}
   242  				return nil, err
   243  			})
   244  		}
   245  
   246  		p, tidx = ce.terminateP, ce.terminalIndex
   247  	})
   248  
   249  	// If the stream is known to be terminated on the Coordinator side, we don't
   250  	// need to issue another request.
   251  	if tidx >= 0 {
   252  		if tr.TerminalIndex != tidx {
   253  			// Not much we can do here, and this probably will never happen, but let's
   254  			// log it if it does.
   255  			log.Fields{
   256  				"requestIndex": tr.TerminalIndex,
   257  				"cachedIndex":  tidx,
   258  			}.Warningf(ctx, "Request terminal index doesn't match cached value.")
   259  		}
   260  		return nil
   261  	}
   262  
   263  	// Resolve our termination Promise.
   264  	if _, err := p.Get(ctx); err != nil {
   265  		// If this is a transient error, delete this Promise so future termination
   266  		// attempts will retry for this stream.
   267  		if transient.Tag.In(err) {
   268  			ce.withLock(func() {
   269  				if ce.terminateP == p {
   270  					ce.terminateP = nil
   271  				}
   272  			})
   273  		}
   274  		return err
   275  	}
   276  	return nil
   277  }
   278  
   279  // loadRemoteTerminalIndex updates our cached remote terminal index with tidx,
   280  // if tidx >= 0 and a remote terminal index is not already cached.
   281  //
   282  // This is executed in the bodies of the register and terminate Promises if they
   283  // receive a terminal index remotely.
   284  func (ce *cacheEntry) loadRemoteTerminalIndex(tidx types.MessageIndex) {
   285  	// Never load an invalid remote terminal index.
   286  	if tidx < 0 {
   287  		return
   288  	}
   289  
   290  	// Load the remote terminal index if one isn't already loaded.
   291  	ce.withLock(func() {
   292  		if ce.terminalIndex < 0 {
   293  			ce.terminalIndex = tidx
   294  		}
   295  	})
   296  }
   297  
   298  func (ce *cacheEntry) withLock(f func()) {
   299  	ce.Lock()
   300  	defer ce.Unlock()
   301  	f()
   302  }