sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/config/cache.go

sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/config/cache.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package config
    18  
    19  import (
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"time"
    24  
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"github.com/sirupsen/logrus"
    27  
    28  	"sigs.k8s.io/prow/pkg/cache"
    29  	"sigs.k8s.io/prow/pkg/git/v2"
    30  )
    31  
    32  // Overview
    33  //
    34  // Consider the expensive function prowYAMLGetter(), which needs to use a Git
    35  // client, walk the filesystem path, etc. To speed things up, we save results of
    36  // this function into a cache named InRepoConfigCache.
    37  
    38  var inRepoConfigCacheMetrics = struct {
    39  	// How many times have we looked up an item in this cache?
    40  	lookups *prometheus.CounterVec
    41  	// Of the lookups, how many times did we get a cache hit?
    42  	hits *prometheus.CounterVec
    43  	// Of the lookups, how many times did we have to construct a cache value
    44  	// ourselves (cache was useless for this lookup)?
    45  	misses *prometheus.CounterVec
    46  	// How many cache key evictions were performed by the underlying LRU
    47  	// algorithm outside of our control?
    48  	evictionsForced *prometheus.CounterVec
    49  	// How many times have we tried to remove a cached key because its value
    50  	// construction failed?
    51  	evictionsManual *prometheus.CounterVec
    52  	// How many entries are in the cache?
    53  	cacheUsageSize *prometheus.GaugeVec
    54  	// How long does it take for GetProwYAML() to run?
    55  	getProwYAMLDuration *prometheus.HistogramVec
    56  }{
    57  	lookups: prometheus.NewCounterVec(prometheus.CounterOpts{
    58  		Name: "inRepoConfigCache_lookups",
    59  		Help: "Count of cache lookups by org and repo.",
    60  	}, []string{
    61  		"org",
    62  		"repo",
    63  	}),
    64  	hits: prometheus.NewCounterVec(prometheus.CounterOpts{
    65  		Name: "inRepoConfigCache_hits",
    66  		Help: "Count of cache lookup hits by org and repo.",
    67  	}, []string{
    68  		"org",
    69  		"repo",
    70  	}),
    71  	misses: prometheus.NewCounterVec(prometheus.CounterOpts{
    72  		Name: "inRepoConfigCache_misses",
    73  		Help: "Count of cache lookup misses by org and repo.",
    74  	}, []string{
    75  		"org",
    76  		"repo",
    77  	}),
    78  	// Every time we evict a key, record it as a Prometheus metric. This way, we
    79  	// can monitor how frequently evictions are happening (if it's happening too
    80  	// frequently, it means that our cache size is too small).
    81  	evictionsForced: prometheus.NewCounterVec(prometheus.CounterOpts{
    82  		Name: "inRepoConfigCache_evictions_forced",
    83  		Help: "Count of forced cache evictions (due to LRU algorithm) by org and repo.",
    84  	}, []string{
    85  		"org",
    86  		"repo",
    87  	}),
    88  	evictionsManual: prometheus.NewCounterVec(prometheus.CounterOpts{
    89  		Name: "inRepoConfigCache_evictions_manual",
    90  		Help: "Count of manual cache evictions (due to faulty value construction) by org and repo.",
    91  	}, []string{
    92  		"org",
    93  		"repo",
    94  	}),
    95  	cacheUsageSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    96  		Name: "inRepoConfigCache_cache_usage_size",
    97  		Help: "Size of the cache (how many entries it is holding) by org and repo.",
    98  	}, []string{
    99  		"org",
   100  		"repo",
   101  	}),
   102  	getProwYAMLDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   103  		Name:    "inRepoConfigCache_GetProwYAML_duration",
   104  		Help:    "Histogram of seconds spent retrieving the ProwYAML (inrepoconfig), by org and repo.",
   105  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 60, 120, 180, 300, 600},
   106  	}, []string{
   107  		"org",
   108  		"repo",
   109  	}),
   110  }
   111  
   112  func init() {
   113  	prometheus.MustRegister(inRepoConfigCacheMetrics.lookups)
   114  	prometheus.MustRegister(inRepoConfigCacheMetrics.hits)
   115  	prometheus.MustRegister(inRepoConfigCacheMetrics.misses)
   116  	prometheus.MustRegister(inRepoConfigCacheMetrics.evictionsForced)
   117  	prometheus.MustRegister(inRepoConfigCacheMetrics.evictionsManual)
   118  	prometheus.MustRegister(inRepoConfigCacheMetrics.cacheUsageSize)
   119  	prometheus.MustRegister(inRepoConfigCacheMetrics.getProwYAMLDuration)
   120  }
   121  
   122  func mkCacheEventCallback(counterVec *prometheus.CounterVec) cache.EventCallback {
   123  	callback := func(key interface{}) {
   124  		org, repo, err := keyToOrgRepo(key)
   125  		if err != nil {
   126  			return
   127  		}
   128  		counterVec.WithLabelValues(org, repo).Inc()
   129  	}
   130  
   131  	return callback
   132  }
   133  
   134  // The InRepoConfigCache needs a Config agent client. Here we require that the Agent
   135  // type fits the prowConfigAgentClient interface, which requires a Config()
   136  // method to retrieve the current Config. Tests can use a fake Config agent
   137  // instead of the real one.
   138  var _ prowConfigAgentClient = (*Agent)(nil)
   139  
   140  type prowConfigAgentClient interface {
   141  	Config() *Config
   142  }
   143  
   144  // InRepoConfigCache is the user-facing cache. It acts as a wrapper around the
   145  // generic LRUCache, by handling type casting in and out of the LRUCache (which
   146  // only handles empty interfaces).
   147  type InRepoConfigCache struct {
   148  	*cache.LRUCache
   149  	configAgent prowConfigAgentClient
   150  	gitClient   git.ClientFactory
   151  }
   152  
   153  // NewInRepoConfigCache creates a new LRU cache for ProwYAML values, where the keys
   154  // are CacheKeys (that is, JSON strings) and values are pointers to ProwYAMLs.
   155  func NewInRepoConfigCache(
   156  	size int,
   157  	configAgent prowConfigAgentClient,
   158  	gitClientFactory git.ClientFactory) (*InRepoConfigCache, error) {
   159  
   160  	if gitClientFactory == nil {
   161  		return nil, fmt.Errorf("InRepoConfigCache requires a non-nil gitClientFactory")
   162  	}
   163  
   164  	lookupsCallback := mkCacheEventCallback(inRepoConfigCacheMetrics.lookups)
   165  	hitsCallback := mkCacheEventCallback(inRepoConfigCacheMetrics.hits)
   166  	missesCallback := mkCacheEventCallback(inRepoConfigCacheMetrics.misses)
   167  	forcedEvictionsCallback := func(key interface{}, _ interface{}) {
   168  		org, repo, err := keyToOrgRepo(key)
   169  		if err != nil {
   170  			return
   171  		}
   172  		inRepoConfigCacheMetrics.evictionsForced.WithLabelValues(org, repo).Inc()
   173  	}
   174  	manualEvictionsCallback := mkCacheEventCallback(inRepoConfigCacheMetrics.evictionsManual)
   175  
   176  	callbacks := cache.Callbacks{
   177  		LookupsCallback:         lookupsCallback,
   178  		HitsCallback:            hitsCallback,
   179  		MissesCallback:          missesCallback,
   180  		ForcedEvictionsCallback: forcedEvictionsCallback,
   181  		ManualEvictionsCallback: manualEvictionsCallback,
   182  	}
   183  
   184  	lruCache, err := cache.NewLRUCache(size, callbacks)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	// This records all OrgRepos we've seen so far during the lifetime of the
   190  	// process. The main purpose is to allow reporting of 0 counts for OrgRepos
   191  	// whose keys have been evicted by the lruCache.
   192  	seenOrgRepos := make(map[OrgRepo]int)
   193  
   194  	cacheSizeMetrics := func() {
   195  		lruCache.Mutex.Lock()         // Lock the mutex
   196  		defer lruCache.Mutex.Unlock() // Unlock the mutex when done
   197  		// Record all unique orgRepo combinations we've seen so far.
   198  		for _, key := range lruCache.Keys() {
   199  			org, repo, err := keyToOrgRepo(key)
   200  			if err != nil {
   201  				// This should only happen if we are deliberately using things
   202  				// other than a CacheKey as the key.
   203  				logrus.Warnf("programmer error: could not report cache size metrics for a key entry: %v", err)
   204  				continue
   205  			}
   206  			orgRepo := OrgRepo{org, repo}
   207  			if count, ok := seenOrgRepos[orgRepo]; ok {
   208  				seenOrgRepos[orgRepo] = count + 1
   209  			} else {
   210  				seenOrgRepos[orgRepo] = 1
   211  			}
   212  		}
   213  		// For every single org and repo in the cache, report how many key
   214  		// entries there are.
   215  		for orgRepo, count := range seenOrgRepos {
   216  			inRepoConfigCacheMetrics.cacheUsageSize.WithLabelValues(
   217  				orgRepo.Org, orgRepo.Repo).Set(float64(count))
   218  			// Reset the counter back down to 0 because it may be that by the
   219  			// time of the next interval, the last key for this orgRepo will be
   220  			// evicted. At that point we still want to report a count of 0.
   221  			seenOrgRepos[orgRepo] = 0
   222  		}
   223  	}
   224  
   225  	go func() {
   226  		for {
   227  			cacheSizeMetrics()
   228  			time.Sleep(30 * time.Second)
   229  		}
   230  	}()
   231  
   232  	cache := &InRepoConfigCache{
   233  		lruCache,
   234  		// Know how to default the retrieved ProwYAML values against the latest Config.
   235  		configAgent,
   236  		// Make the cache be able to handle cache misses (by calling out to Git
   237  		// to construct the ProwYAML value).
   238  		gitClientFactory,
   239  	}
   240  
   241  	return cache, nil
   242  }
   243  
   244  // CacheKey acts as a key to the InRepoConfigCache. We construct it by marshaling
   245  // CacheKeyParts into a JSON string.
   246  type CacheKey string
   247  
   248  // The CacheKeyParts is a struct because we want to keep the various components
   249  // that make up the key separate to help keep tests readable. Because the
   250  // headSHAs field is a slice, the overall CacheKey object is not hashable and
   251  // cannot be used directly as a key. Instead we marshal it to JSON first, then
   252  // convert its type to CacheKey.
   253  //
   254  // Users should take care to ensure that headSHAs remains stable (order
   255  // matters).
   256  type CacheKeyParts struct {
   257  	Identifier string   `json:"identifier"`
   258  	BaseSHA    string   `json:"baseSHA"`
   259  	HeadSHAs   []string `json:"headSHAs"`
   260  }
   261  
   262  // CacheKey converts a CacheKeyParts object into a JSON string (to be used as a
   263  // CacheKey).
   264  func (kp *CacheKeyParts) CacheKey() (CacheKey, error) {
   265  	data, err := json.Marshal(kp)
   266  	if err != nil {
   267  		return "", err
   268  	}
   269  
   270  	return CacheKey(data), nil
   271  }
   272  
   273  func (cacheKey CacheKey) toCacheKeyParts() (CacheKeyParts, error) {
   274  	kp := CacheKeyParts{}
   275  	if err := json.Unmarshal([]byte(cacheKey), &kp); err != nil {
   276  		return kp, err
   277  	}
   278  	return kp, nil
   279  }
   280  
   281  func keyToOrgRepo(key interface{}) (string, string, error) {
   282  
   283  	cacheKey, ok := key.(CacheKey)
   284  	if !ok {
   285  		return "", "", fmt.Errorf("key is not a CacheKey")
   286  	}
   287  
   288  	kp, err := cacheKey.toCacheKeyParts()
   289  	if err != nil {
   290  		return "", "", err
   291  	}
   292  
   293  	org, repo, err := SplitRepoName(kp.Identifier)
   294  	if err != nil {
   295  		return "", "", err
   296  	}
   297  
   298  	return org, repo, nil
   299  }
   300  
   301  // GetPresubmits uses a cache lookup to get the *ProwYAML value (cache hit),
   302  // instead of computing it from scratch (cache miss). It also stores the
   303  // *ProwYAML into the cache if there is a cache miss.
   304  func (cache *InRepoConfigCache) GetPresubmits(identifier, baseBranch string, baseSHAGetter RefGetter, headSHAGetters ...RefGetter) ([]Presubmit, error) {
   305  	prowYAML, err := cache.GetProwYAML(identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  
   310  	c := cache.configAgent.Config()
   311  	return append(c.GetPresubmitsStatic(identifier), prowYAML.Presubmits...), nil
   312  }
   313  
   314  // GetPostsubmitsCached is like GetPostsubmits, but attempts to use a cache
   315  // lookup to get the *ProwYAML value (cache hit), instead of computing it from
   316  // scratch (cache miss). It also stores the *ProwYAML into the cache if there is
   317  // a cache miss.
   318  func (cache *InRepoConfigCache) GetPostsubmits(identifier, baseBranch string, baseSHAGetter RefGetter, headSHAGetters ...RefGetter) ([]Postsubmit, error) {
   319  	prowYAML, err := cache.GetProwYAML(identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   320  	if err != nil {
   321  		return nil, err
   322  	}
   323  
   324  	c := cache.configAgent.Config()
   325  	return append(c.GetPostsubmitsStatic(identifier), prowYAML.Postsubmits...), nil
   326  }
   327  
   328  // GetProwYAML returns the ProwYAML value stored in the InRepoConfigCache.
   329  func (cache *InRepoConfigCache) GetProwYAML(identifier, baseBranch string, baseSHAGetter RefGetter, headSHAGetters ...RefGetter) (*ProwYAML, error) {
   330  	prowYAML, err := cache.GetProwYAMLWithoutDefaults(identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   331  	if err != nil {
   332  		return nil, err
   333  	}
   334  
   335  	c := cache.configAgent.Config()
   336  
   337  	// Create a new ProwYAML object based on what we retrieved from the cache.
   338  	// This way, the act of defaulting values does not modify the elements in
   339  	// the Presubmits and Postsubmits slices (recall that slices are just
   340  	// references to areas of memory). This is important for InRepoConfigCache to
   341  	// behave correctly; otherwise when we default the cached ProwYAML values,
   342  	// the cached item becomes mutated, affecting future cache lookups.
   343  	newProwYAML := prowYAML.DeepCopy()
   344  	if err := DefaultAndValidateProwYAML(c, newProwYAML, identifier); err != nil {
   345  		return nil, err
   346  	}
   347  
   348  	return newProwYAML, nil
   349  }
   350  
   351  func (cache *InRepoConfigCache) GetProwYAMLWithoutDefaults(identifier, baseBranch string, baseSHAGetter RefGetter, headSHAGetters ...RefGetter) (*ProwYAML, error) {
   352  	timeGetProwYAML := time.Now()
   353  	defer func() {
   354  		orgRepo := NewOrgRepo(identifier)
   355  		inRepoConfigCacheMetrics.getProwYAMLDuration.WithLabelValues(orgRepo.Org, orgRepo.Repo).Observe((float64(time.Since(timeGetProwYAML).Seconds())))
   356  	}()
   357  
   358  	c := cache.configAgent.Config()
   359  
   360  	prowYAML, err := cache.getProwYAML(c.getProwYAML, identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  
   365  	return prowYAML, nil
   366  }
   367  
   368  // GetInRepoConfig just wraps around GetProwYAML().
   369  func (cache *InRepoConfigCache) GetInRepoConfig(identifier, baseBranch string, baseSHAGetter RefGetter, headSHAGetters ...RefGetter) (*ProwYAML, error) {
   370  	return cache.GetProwYAML(identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   371  }
   372  
   373  // valConstructorHelper is called to construct ProwYAML values inside the cache.
   374  type valConstructorHelper func(
   375  	gitClient git.ClientFactory,
   376  	identifier string,
   377  	baseBranch string,
   378  	baseSHAGetter RefGetter,
   379  	headSHAGetters ...RefGetter,
   380  ) (*ProwYAML, error)
   381  
   382  // getProwYAML performs a lookup of previously-calculated *ProwYAML objects. The
   383  // 'valConstructorHelper' is used in two ways. First it is used by the caching
   384  // mechanism to lazily generate the value only when it is required (otherwise,
   385  // if all threads had to generate the value, it would defeat the purpose of the
   386  // cache in the first place). Second, it makes it easier to test this function,
   387  // because unit tests can just provide its own function for constructing a
   388  // *ProwYAML object (instead of needing to create an actual Git repo, etc.).
   389  func (cache *InRepoConfigCache) getProwYAML(
   390  	valConstructorHelper valConstructorHelper,
   391  	identifier string,
   392  	baseBranch string,
   393  	baseSHAGetter RefGetter,
   394  	headSHAGetters ...RefGetter) (*ProwYAML, error) {
   395  
   396  	if identifier == "" {
   397  		return nil, errors.New("no identifier for repo given")
   398  	}
   399  
   400  	// Abort if the InRepoConfig is not enabled for this identifier (org/repo).
   401  	// It's important that we short-circuit here __before__ calling cache.Get()
   402  	// because we do NOT want to add an empty &ProwYAML{} value in the cache
   403  	// (because not only is it useless, but adding a useless entry also may
   404  	// result in evicting a useful entry if the underlying cache is full and an
   405  	// older (useful) key is evicted).
   406  	c := cache.configAgent.Config()
   407  	if !c.InRepoConfigEnabled(identifier) {
   408  		logrus.WithField("identifier", identifier).Debug("Inrepoconfig not enabled, skipping getting prow yaml.")
   409  		return &ProwYAML{}, nil
   410  	}
   411  
   412  	baseSHA, headSHAs, err := GetAndCheckRefs(baseSHAGetter, headSHAGetters...)
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  
   417  	valConstructor := func() (interface{}, error) {
   418  		return valConstructorHelper(cache.gitClient, identifier, baseBranch, baseSHAGetter, headSHAGetters...)
   419  	}
   420  
   421  	got, err := cache.get(CacheKeyParts{Identifier: identifier, BaseSHA: baseSHA, HeadSHAs: headSHAs}, valConstructor)
   422  	if err != nil {
   423  		return nil, err
   424  	}
   425  
   426  	return got, err
   427  }
   428  
   429  // get is a type assertion wrapper around the values retrieved from the inner
   430  // LRUCache object (which only understands empty interfaces for both keys and
   431  // values). It wraps around the low-level GetOrAdd function. Users are expected
   432  // to add their own get method for their own cached value.
   433  func (cache *InRepoConfigCache) get(
   434  	keyParts CacheKeyParts,
   435  	valConstructor cache.ValConstructor) (*ProwYAML, error) {
   436  
   437  	key, err := keyParts.CacheKey()
   438  	if err != nil {
   439  		return nil, fmt.Errorf("converting CacheKeyParts to CacheKey: %v", err)
   440  	}
   441  
   442  	now := time.Now()
   443  	val, cacheHit, err := cache.GetOrAdd(key, valConstructor)
   444  	if err != nil {
   445  		return nil, err
   446  	}
   447  	logrus.WithFields(logrus.Fields{
   448  		"identifier":        keyParts.Identifier,
   449  		"key":               key,
   450  		"duration(seconds)": -time.Until(now).Seconds(),
   451  		"cache_hit":         cacheHit,
   452  	}).Debug("Duration for resolving inrepoconfig cache.")
   453  
   454  	prowYAML, ok := val.(*ProwYAML)
   455  	if ok {
   456  		return prowYAML, err
   457  	}
   458  
   459  	// Somehow, the value retrieved with GetOrAdd has the wrong type. This can
   460  	// happen if some other function modified the cache and put in the wrong
   461  	// type. Ultimately, this is a price we pay for using a cache library that
   462  	// uses "interface{}" for the type of its items.
   463  	err = fmt.Errorf("Programmer error: expected value type '*config.ProwYAML', got '%T'", val)
   464  	logrus.Error(err)
   465  	return nil, err
   466  }