github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/ghcache/coalesce.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package ghcache 18 19 import ( 20 "bufio" 21 "bytes" 22 "net/http" 23 "net/http/httputil" 24 "strconv" 25 "sync" 26 "time" 27 28 "github.com/sirupsen/logrus" 29 30 "sigs.k8s.io/prow/pkg/github/ghmetrics" 31 ) 32 33 // requestCoalescer allows concurrent requests for the same URI to share a 34 // single upstream request and response. Once a request comes in for processing 35 // for the first time, it is processed and a response is received (via 36 // "requestExecutor"). Meanwhile, if there are any other requests for the same URI, 37 // those threads Wait(). Then when the first request is done processing (we 38 // receive a real request), we copy the original request's response into the 39 // subscribed threads, before letting them all finish. The "cache" map is there 40 // for our own short-term memory of knowing which request is the "first" one of 41 // its kind. 42 type requestCoalescer struct { 43 sync.Mutex 44 cache map[string]*firstRequest 45 46 // requestExecutor is anything that can resolve a request by executing a 47 // single HTTP transaction, returning a Response for the provided Request. 48 // The coalescer uses this to talk to the actual proxied backend. Using an 49 // interface here allows us to mock out a fake backend server's response to 50 // the request. 51 requestExecutor http.RoundTripper 52 53 hasher ghmetrics.Hasher 54 } 55 56 // firstRequest is where we store the coalesced requests's actual response. It 57 // is named firstRequest because only the first one (which also creates the 58 // entry in the cache) will actually be resolved by being processed over the 59 // network; all subsequent requests that match the first request's URL will end 60 // up waiting for this first request to finish. After the first request is 61 // processed, the "resp" field will be populated, and subsequent requests will 62 // simply reuse the same "resp" body. Note that if the first request fails, then 63 // all subsequent requests will fail together. 64 type firstRequest struct { 65 *sync.Cond 66 67 // Are there any threads that are "subscribed" to this first request's 68 // response? 69 subscribers bool 70 resp []byte 71 err error 72 } 73 74 // RoundTrip coalesces concurrent GET requests for the same URI by blocking 75 // the later requests until the first request returns and then sharing the 76 // response between all requests. 77 // 78 // Notes: Deadlock shouldn't be possible because the map lock is always 79 // acquired before firstRequest lock if both locks are to be held and we 80 // never hold multiple firstRequest locks. 81 func (coalescer *requestCoalescer) RoundTrip(req *http.Request) (*http.Response, error) { 82 // Only coalesce GET requests 83 if req.Method != http.MethodGet { 84 resp, err := coalescer.requestExecutor.RoundTrip(req) 85 var tokenBudgetName string 86 if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" { 87 tokenBudgetName = val 88 } else { 89 tokenBudgetName = coalescer.hasher.Hash(req) 90 } 91 collectMetrics(ModeSkip, req, resp, tokenBudgetName) 92 return resp, err 93 } 94 95 var cacheMode = ModeError 96 resp, err := func() (*http.Response, error) { 97 key := req.URL.String() 98 coalescer.Lock() 99 firstReq, ok := coalescer.cache[key] 100 // Note that we cannot immediately Unlock() coalescer here just after 101 // the cache lookup, because that may result in multiple threads 102 // possibly becoming a "firstReq" creator (main) thread. This is why we 103 // only Unlock() coalescer __after__ creating the cache entry. 104 105 // Earlier request in flight. Wait for its response, which will be 106 // received by a different thread (specifically, the original thread 107 // that created the firstReq object --- let's call this the "main" 108 // thread for simplicity). 109 if ok { 110 // If the request that we're trying to process has a body, don't 111 // forget to close it. Normally if we're performing the HTTP 112 // roundtrip ourselves, we won't need to do this because the 113 // RoundTripper will do it on its own. However we'll never call 114 // RoundTrip() on this request ourselves because we're going to be 115 // lazy and just wait for the main thread to do it for us. So we 116 // need to close the body directly. See 117 // https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/net/http/transport.go;l=510 118 // and 119 // https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/net/http/request.go;drc=refs%2Ftags%2Fgo1.17.1;l=1408 120 // for an example. 121 if req.Body != nil { 122 defer req.Body.Close() // Since we won't pass the request we must close it. 123 } 124 125 // Let the main thread know that there is at least one subscriber 126 // (us). We do this by incrementing the firstReq.subscribers 127 // variable. Note that we first grab the inner firstReq lock before 128 // unlocking the outer coalescer. This order is important as it 129 // guarantees that no other threads will delete the cache entry 130 // (firstReq) before we're done waiting for it. 131 // 132 // We need to unlock the coalescer so that other threads can read 133 // from it (and decide whether to wait or create a new cache entry). 134 // That is, the coalescer itself should never be blocked by 135 // subscribed threads. 136 firstReq.L.Lock() 137 coalescer.Unlock() 138 firstReq.subscribers = true 139 140 // The documentation for Wait() says: 141 // "Because c.L is not locked when Wait first resumes, the caller typically 142 // cannot assume that the condition is true when Wait returns. Instead, the 143 // caller should Wait in a loop." 144 // This does not apply to this use of Wait() because the condition we are 145 // waiting for remains true once it becomes true. This lets us avoid the 146 // normal check to see if the condition has switched back to false between 147 // the signal being sent and this thread acquiring the lock. 148 149 // Unlock firstReq.L variable (so that the thread that __did__ create 150 // the first request can actually process it). Suspend execution of 151 // this thread until that is done. 152 firstReq.Wait() 153 154 // Because firstReq.Wait() will lock firstReq.L before returning, 155 // release the lock now because we won't be modifying anything 156 // inside firstRequest. Anyway, if we're here it means that we've 157 // been woken by a Broadcast() by the main thread. 158 firstReq.L.Unlock() 159 160 if firstReq.err != nil { 161 // Don't log the error ourselves, because it will be logged once 162 // by the main thread. This avoids spamming the logs with the 163 // same error. 164 return nil, firstReq.err 165 } 166 167 // Copy in firstReq's response into our own response. We didn't have 168 // to process the request ourselves! Wasn't that easy? 169 resp, err := http.ReadResponse(bufio.NewReader(bytes.NewBuffer(firstReq.resp)), nil) 170 if err != nil { 171 logrus.WithField("cache-key", key).WithError(err).Error("Error loading response.") 172 return nil, err 173 } 174 175 cacheMode = ModeCoalesced 176 return resp, nil 177 } 178 179 // No earlier (first) request in flight yet. Create a new firstRequest 180 // object and process it ourselves. 181 firstReq = &firstRequest{Cond: sync.NewCond(&sync.Mutex{})} 182 coalescer.cache[key] = firstReq 183 184 // Unlock the coalescer so that it doesn't block on this particular 185 // request. This allows subsequent requests for the same URL to become 186 // subscribers to this main one. 187 coalescer.Unlock() 188 189 // Actually process the request and get a response. 190 resp, err := coalescer.requestExecutor.RoundTrip(req) 191 // Real response received. Remove this firstRequest from the cache first 192 // __before__ waking any subscribed threads to let them copy the 193 // response we got. This order is important. If delete the cache entry 194 // __after__ waking the subscribed threads, then the following race 195 // condition can happen: 196 // 197 // 1. firstReq creator thread wakes subscribed threads 198 // 2. subscribed threads begin copying data from firstReq struct 199 // 3. *NEW* subscribers get created, because the cached key is still there 200 // 4. cached key is finally deleted 201 // 5. firstReq creator thread from Step 1 dies 202 // 6. subscribed threads from Step 3 will wait forever 203 // (memory leak, not to mention request timeout for all of these) 204 // 205 // Deleting the cache key now also allows a new firstRequest{} object to 206 // be created (and the whole cycle repeated again) by another set of 207 // requests in flight, if any. 208 coalescer.Lock() 209 delete(coalescer.cache, key) 210 coalescer.Unlock() 211 212 // Write response data into firstReq for all subscribers to see. But 213 // only bother with writing into firstReq if we have subscribers at all 214 // (because otherwise no other thread will use it anyway). 215 firstReq.L.Lock() 216 if firstReq.subscribers { 217 if err != nil { 218 firstReq.resp, firstReq.err = nil, err 219 } else { 220 // Copy the response into firstReq.resp before letting 221 // subscribers know about it. 222 firstReq.resp, firstReq.err = httputil.DumpResponse(resp, true) 223 } 224 225 // Wake up all subscribed threads. They will all read firstReq.resp 226 // to construct their own (identical) HTTP Responses, based on the 227 // contents of firstReq. 228 firstReq.Broadcast() 229 } 230 firstReq.L.Unlock() 231 232 // The RoundTrip() encountered an error. Log it. 233 if err != nil { 234 logrus.WithField("cache-key", key).WithError(err).Warn("Error from cache transport layer.") 235 return nil, err 236 } 237 238 // Return a ModeMiss by default (that is, the response was not in the 239 // cache, so we had to proxy the request and cache the response). This 240 // is what cacheResponseMode() does, unless there are other modes we can 241 // glean from the response header, find it with cacheResponseMode. 242 cacheMode = cacheResponseMode(resp.Header) 243 244 return resp, nil 245 }() 246 247 var tokenBudgetName string 248 if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" { 249 tokenBudgetName = val 250 } else { 251 tokenBudgetName = coalescer.hasher.Hash(req) 252 } 253 254 collectMetrics(cacheMode, req, resp, tokenBudgetName) 255 return resp, err 256 } 257 258 func collectMetrics(cacheMode CacheResponseMode, req *http.Request, resp *http.Response, tokenBudgetName string) { 259 ghmetrics.CollectCacheRequestMetrics(string(cacheMode), req.URL.Path, req.Header.Get("User-Agent"), tokenBudgetName) 260 if resp != nil { 261 resp.Header.Set(CacheModeHeader, string(cacheMode)) 262 if cacheMode == ModeRevalidated && resp.Header.Get(cacheEntryCreationDateHeader) != "" { 263 intVal, err := strconv.Atoi(resp.Header.Get(cacheEntryCreationDateHeader)) 264 if err != nil { 265 logrus.WithError(err).WithField("header-value", resp.Header.Get(cacheEntryCreationDateHeader)).Warn("Failed to convert cacheEntryCreationDateHeader value to int") 266 } else { 267 ghmetrics.CollectCacheEntryAgeMetrics(float64(time.Now().Unix()-int64(intVal)), req.URL.Path, req.Header.Get("User-Agent"), tokenBudgetName) 268 } 269 } 270 } 271 }