github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/ghcache/coalesce.go

github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/ghcache/coalesce.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package ghcache
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"net/http"
    23  	"net/http/httputil"
    24  	"strconv"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/sirupsen/logrus"
    29  
    30  	"sigs.k8s.io/prow/pkg/github/ghmetrics"
    31  )
    32  
    33  // requestCoalescer allows concurrent requests for the same URI to share a
    34  // single upstream request and response. Once a request comes in for processing
    35  // for the first time, it is processed and a response is received (via
    36  // "requestExecutor"). Meanwhile, if there are any other requests for the same URI,
    37  // those threads Wait(). Then when the first request is done processing (we
    38  // receive a real request), we copy the original request's response into the
    39  // subscribed threads, before letting them all finish. The "cache" map is there
    40  // for our own short-term memory of knowing which request is the "first" one of
    41  // its kind.
    42  type requestCoalescer struct {
    43  	sync.Mutex
    44  	cache map[string]*firstRequest
    45  
    46  	// requestExecutor is anything that can resolve a request by executing a
    47  	// single HTTP transaction, returning a Response for the provided Request.
    48  	// The coalescer uses this to talk to the actual proxied backend. Using an
    49  	// interface here allows us to mock out a fake backend server's response to
    50  	// the request.
    51  	requestExecutor http.RoundTripper
    52  
    53  	hasher ghmetrics.Hasher
    54  }
    55  
    56  // firstRequest is where we store the coalesced requests's actual response. It
    57  // is named firstRequest because only the first one (which also creates the
    58  // entry in the cache) will actually be resolved by being processed over the
    59  // network; all subsequent requests that match the first request's URL will end
    60  // up waiting for this first request to finish. After the first request is
    61  // processed, the "resp" field will be populated, and subsequent requests will
    62  // simply reuse the same "resp" body. Note that if the first request fails, then
    63  // all subsequent requests will fail together.
    64  type firstRequest struct {
    65  	*sync.Cond
    66  
    67  	// Are there any threads that are "subscribed" to this first request's
    68  	// response?
    69  	subscribers bool
    70  	resp        []byte
    71  	err         error
    72  }
    73  
    74  // RoundTrip coalesces concurrent GET requests for the same URI by blocking
    75  // the later requests until the first request returns and then sharing the
    76  // response between all requests.
    77  //
    78  // Notes: Deadlock shouldn't be possible because the map lock is always
    79  // acquired before firstRequest lock if both locks are to be held and we
    80  // never hold multiple firstRequest locks.
    81  func (coalescer *requestCoalescer) RoundTrip(req *http.Request) (*http.Response, error) {
    82  	// Only coalesce GET requests
    83  	if req.Method != http.MethodGet {
    84  		resp, err := coalescer.requestExecutor.RoundTrip(req)
    85  		var tokenBudgetName string
    86  		if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" {
    87  			tokenBudgetName = val
    88  		} else {
    89  			tokenBudgetName = coalescer.hasher.Hash(req)
    90  		}
    91  		collectMetrics(ModeSkip, req, resp, tokenBudgetName)
    92  		return resp, err
    93  	}
    94  
    95  	var cacheMode = ModeError
    96  	resp, err := func() (*http.Response, error) {
    97  		key := req.URL.String()
    98  		coalescer.Lock()
    99  		firstReq, ok := coalescer.cache[key]
   100  		// Note that we cannot immediately Unlock() coalescer here just after
   101  		// the cache lookup, because that may result in multiple threads
   102  		// possibly becoming a "firstReq" creator (main) thread. This is why we
   103  		// only Unlock() coalescer __after__ creating the cache entry.
   104  
   105  		// Earlier request in flight. Wait for its response, which will be
   106  		// received by a different thread (specifically, the original thread
   107  		// that created the firstReq object --- let's call this the "main"
   108  		// thread for simplicity).
   109  		if ok {
   110  			// If the request that we're trying to process has a body, don't
   111  			// forget to close it. Normally if we're performing the HTTP
   112  			// roundtrip ourselves, we won't need to do this because the
   113  			// RoundTripper will do it on its own. However we'll never call
   114  			// RoundTrip() on this request ourselves because we're going to be
   115  			// lazy and just wait for the main thread to do it for us. So we
   116  			// need to close the body directly. See
   117  			// https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/net/http/transport.go;l=510
   118  			// and
   119  			// https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/net/http/request.go;drc=refs%2Ftags%2Fgo1.17.1;l=1408
   120  			// for an example.
   121  			if req.Body != nil {
   122  				defer req.Body.Close() // Since we won't pass the request we must close it.
   123  			}
   124  
   125  			// Let the main thread know that there is at least one subscriber
   126  			// (us). We do this by incrementing the firstReq.subscribers
   127  			// variable. Note that we first grab the inner firstReq lock before
   128  			// unlocking the outer coalescer. This order is important as it
   129  			// guarantees that no other threads will delete the cache entry
   130  			// (firstReq) before we're done waiting for it.
   131  			//
   132  			// We need to unlock the coalescer so that other threads can read
   133  			// from it (and decide whether to wait or create a new cache entry).
   134  			// That is, the coalescer itself should never be blocked by
   135  			// subscribed threads.
   136  			firstReq.L.Lock()
   137  			coalescer.Unlock()
   138  			firstReq.subscribers = true
   139  
   140  			// The documentation for Wait() says:
   141  			// "Because c.L is not locked when Wait first resumes, the caller typically
   142  			// cannot assume that the condition is true when Wait returns. Instead, the
   143  			// caller should Wait in a loop."
   144  			// This does not apply to this use of Wait() because the condition we are
   145  			// waiting for remains true once it becomes true. This lets us avoid the
   146  			// normal check to see if the condition has switched back to false between
   147  			// the signal being sent and this thread acquiring the lock.
   148  
   149  			// Unlock firstReq.L variable (so that the thread that __did__ create
   150  			// the first request can actually process it). Suspend execution of
   151  			// this thread until that is done.
   152  			firstReq.Wait()
   153  
   154  			// Because firstReq.Wait() will lock firstReq.L before returning,
   155  			// release the lock now because we won't be modifying anything
   156  			// inside firstRequest. Anyway, if we're here it means that we've
   157  			// been woken by a Broadcast() by the main thread.
   158  			firstReq.L.Unlock()
   159  
   160  			if firstReq.err != nil {
   161  				// Don't log the error ourselves, because it will be logged once
   162  				// by the main thread. This avoids spamming the logs with the
   163  				// same error.
   164  				return nil, firstReq.err
   165  			}
   166  
   167  			// Copy in firstReq's response into our own response. We didn't have
   168  			// to process the request ourselves! Wasn't that easy?
   169  			resp, err := http.ReadResponse(bufio.NewReader(bytes.NewBuffer(firstReq.resp)), nil)
   170  			if err != nil {
   171  				logrus.WithField("cache-key", key).WithError(err).Error("Error loading response.")
   172  				return nil, err
   173  			}
   174  
   175  			cacheMode = ModeCoalesced
   176  			return resp, nil
   177  		}
   178  
   179  		// No earlier (first) request in flight yet. Create a new firstRequest
   180  		// object and process it ourselves.
   181  		firstReq = &firstRequest{Cond: sync.NewCond(&sync.Mutex{})}
   182  		coalescer.cache[key] = firstReq
   183  
   184  		// Unlock the coalescer so that it doesn't block on this particular
   185  		// request. This allows subsequent requests for the same URL to become
   186  		// subscribers to this main one.
   187  		coalescer.Unlock()
   188  
   189  		// Actually process the request and get a response.
   190  		resp, err := coalescer.requestExecutor.RoundTrip(req)
   191  		// Real response received. Remove this firstRequest from the cache first
   192  		// __before__ waking any subscribed threads to let them copy the
   193  		// response we got. This order is important. If delete the cache entry
   194  		// __after__ waking the subscribed threads, then the following race
   195  		// condition can happen:
   196  		//
   197  		//  1. firstReq creator thread wakes subscribed threads
   198  		//  2. subscribed threads begin copying data from firstReq struct
   199  		//  3. *NEW* subscribers get created, because the cached key is still there
   200  		//  4. cached key is finally deleted
   201  		//  5. firstReq creator thread from Step 1 dies
   202  		//  6. subscribed threads from Step 3 will wait forever
   203  		//     (memory leak, not to mention request timeout for all of these)
   204  		//
   205  		// Deleting the cache key now also allows a new firstRequest{} object to
   206  		// be created (and the whole cycle repeated again) by another set of
   207  		// requests in flight, if any.
   208  		coalescer.Lock()
   209  		delete(coalescer.cache, key)
   210  		coalescer.Unlock()
   211  
   212  		// Write response data into firstReq for all subscribers to see. But
   213  		// only bother with writing into firstReq if we have subscribers at all
   214  		// (because otherwise no other thread will use it anyway).
   215  		firstReq.L.Lock()
   216  		if firstReq.subscribers {
   217  			if err != nil {
   218  				firstReq.resp, firstReq.err = nil, err
   219  			} else {
   220  				// Copy the response into firstReq.resp before letting
   221  				// subscribers know about it.
   222  				firstReq.resp, firstReq.err = httputil.DumpResponse(resp, true)
   223  			}
   224  
   225  			// Wake up all subscribed threads. They will all read firstReq.resp
   226  			// to construct their own (identical) HTTP Responses, based on the
   227  			// contents of firstReq.
   228  			firstReq.Broadcast()
   229  		}
   230  		firstReq.L.Unlock()
   231  
   232  		// The RoundTrip() encountered an error. Log it.
   233  		if err != nil {
   234  			logrus.WithField("cache-key", key).WithError(err).Warn("Error from cache transport layer.")
   235  			return nil, err
   236  		}
   237  
   238  		// Return a ModeMiss by default (that is, the response was not in the
   239  		// cache, so we had to proxy the request and cache the response). This
   240  		// is what cacheResponseMode() does, unless there are other modes we can
   241  		// glean from the response header, find it with cacheResponseMode.
   242  		cacheMode = cacheResponseMode(resp.Header)
   243  
   244  		return resp, nil
   245  	}()
   246  
   247  	var tokenBudgetName string
   248  	if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" {
   249  		tokenBudgetName = val
   250  	} else {
   251  		tokenBudgetName = coalescer.hasher.Hash(req)
   252  	}
   253  
   254  	collectMetrics(cacheMode, req, resp, tokenBudgetName)
   255  	return resp, err
   256  }
   257  
   258  func collectMetrics(cacheMode CacheResponseMode, req *http.Request, resp *http.Response, tokenBudgetName string) {
   259  	ghmetrics.CollectCacheRequestMetrics(string(cacheMode), req.URL.Path, req.Header.Get("User-Agent"), tokenBudgetName)
   260  	if resp != nil {
   261  		resp.Header.Set(CacheModeHeader, string(cacheMode))
   262  		if cacheMode == ModeRevalidated && resp.Header.Get(cacheEntryCreationDateHeader) != "" {
   263  			intVal, err := strconv.Atoi(resp.Header.Get(cacheEntryCreationDateHeader))
   264  			if err != nil {
   265  				logrus.WithError(err).WithField("header-value", resp.Header.Get(cacheEntryCreationDateHeader)).Warn("Failed to convert cacheEntryCreationDateHeader value to int")
   266  			} else {
   267  				ghmetrics.CollectCacheEntryAgeMetrics(float64(time.Now().Unix()-int64(intVal)), req.URL.Path, req.Header.Get("User-Agent"), tokenBudgetName)
   268  			}
   269  		}
   270  	}
   271  }