github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/ghcache/ghcache.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package ghcache implements an HTTP cache optimized for caching responses 18 // from the GitHub API (https://api.github.com). 19 // 20 // Specifically, it enforces a cache policy that revalidates every cache hit 21 // with a conditional request to upstream regardless of cache entry freshness 22 // because conditional requests for unchanged resources don't cost any API 23 // tokens!!! See: https://developer.github.com/v3/#conditional-requests 24 // 25 // It also provides request coalescing and prometheus instrumentation. 26 package ghcache 27 28 import ( 29 "context" 30 "encoding/json" 31 "fmt" 32 "net/http" 33 "os" 34 "path" 35 "path/filepath" 36 "strconv" 37 "strings" 38 "sync" 39 "time" 40 41 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 42 utilerrors "k8s.io/apimachinery/pkg/util/errors" 43 44 "github.com/cjwagner/httpcache" 45 "github.com/cjwagner/httpcache/diskcache" 46 rediscache "github.com/cjwagner/httpcache/redis" 47 "github.com/gomodule/redigo/redis" 48 "github.com/peterbourgon/diskv" 49 "github.com/prometheus/client_golang/prometheus" 50 "github.com/sirupsen/logrus" 51 "golang.org/x/sync/semaphore" 52 "sigs.k8s.io/prow/pkg/github/ghmetrics" 53 ) 54 55 type CacheResponseMode string 56 57 // Cache response modes describe how ghcache fulfilled a request. 58 const ( 59 CacheModeHeader = "X-Cache-Mode" 60 61 ModeError CacheResponseMode = "ERROR" // internal error handling request 62 ModeNoStore CacheResponseMode = "NO-STORE" // response not cacheable 63 ModeMiss CacheResponseMode = "MISS" // not in cache, request proxied and response cached. 64 ModeChanged CacheResponseMode = "CHANGED" // cache value invalid: resource changed, cache updated 65 ModeSkip CacheResponseMode = "SKIP" // cache was skipped, not applicable. e.g. POST request. 66 // The modes below are the happy cases in which the request is fulfilled for 67 // free (no API tokens used). 68 ModeCoalesced CacheResponseMode = "COALESCED" // coalesced request, this is a copied response 69 ModeRevalidated CacheResponseMode = "REVALIDATED" // cached value revalidated and returned 70 71 // cacheEntryCreationDateHeader contains the creation date of the cache entry 72 cacheEntryCreationDateHeader = "X-PROW-REQUEST-DATE" 73 74 // TokenBudgetIdentifierHeader is used to identify the token budget for 75 // which metrics should be recorded if set. If unset, the sha256sum of 76 // the Authorization header will be used. 77 TokenBudgetIdentifierHeader = "X-PROW-GHCACHE-TOKEN-BUDGET-IDENTIFIER" 78 79 // TokenExpiryAtHeader includes a date at which the passed token expires and all associated caches 80 // can be cleaned up. It's value must be in RFC3339 format. 81 TokenExpiryAtHeader = "X-PROW-TOKEN-EXPIRES-AT" 82 83 apiV3 = "v3" 84 apiV4 = "v4" 85 ) 86 87 // RequestThrottlingTimes keeps the information about throttling times per API and request methods 88 type RequestThrottlingTimes struct { 89 // throttlingTime is applied for all non-GET request methods for apiV3 and apiV4 90 throttlingTime uint 91 // throttlingTimeV4 if different than 0, it's applied for non-GET request methods for apiV4, instead of ThrottlingTime 92 throttlingTimeV4 uint 93 // throttlingTimeForGET is applied for all GET request methods for apiV3 and apiV4 94 throttlingTimeForGET uint 95 // maxDelayTime is applied when formed queue is too large, it allows to temporarily set max delay time provided by user instead of calculated value 96 maxDelayTime uint 97 // maxDelayTimeV4 is maxDelayTime for APIv4 98 maxDelayTimeV4 uint 99 } 100 101 func (rtt *RequestThrottlingTimes) isEnabled() bool { 102 return rtt.throttlingTime > 0 && rtt.throttlingTimeForGET > 0 103 } 104 105 func (rtt *RequestThrottlingTimes) getThrottlingTimeV4() uint { 106 if rtt.throttlingTimeV4 > 0 { 107 return rtt.throttlingTimeV4 108 } 109 return rtt.throttlingTime 110 } 111 112 // NewRequestThrottlingTimes creates a new RequestThrottlingTimes and returns it 113 func NewRequestThrottlingTimes(requestThrottlingTime, requestThrottlingTimeV4, requestThrottlingTimeForGET, requestThrottlingMaxDelayTime, requestThrottlingMaxDelayTimeV4 uint) RequestThrottlingTimes { 114 return RequestThrottlingTimes{ 115 throttlingTime: requestThrottlingTime, 116 throttlingTimeV4: requestThrottlingTimeV4, 117 throttlingTimeForGET: requestThrottlingTimeForGET, 118 maxDelayTime: requestThrottlingMaxDelayTime, 119 maxDelayTimeV4: requestThrottlingMaxDelayTimeV4, 120 } 121 } 122 123 func CacheModeIsFree(mode CacheResponseMode) bool { 124 switch mode { 125 case ModeCoalesced: 126 return true 127 case ModeRevalidated: 128 return true 129 case ModeError: 130 // In this case we did not successfully communicate with the GH API, so no 131 // token is used, but we also don't return a response, so ModeError won't 132 // ever be returned as a value of CacheModeHeader. 133 return true 134 } 135 return false 136 } 137 138 // outboundConcurrencyGauge provides the 'concurrent_outbound_requests' gauge that 139 // is global to the proxy. 140 var outboundConcurrencyGauge = prometheus.NewGauge(prometheus.GaugeOpts{ 141 Name: "concurrent_outbound_requests", 142 Help: "How many concurrent requests are in flight to GitHub servers.", 143 }) 144 145 // pendingOutboundConnectionsGauge provides the 'pending_outbound_requests' gauge that 146 // is global to the proxy. 147 var pendingOutboundConnectionsGauge = prometheus.NewGauge(prometheus.GaugeOpts{ 148 Name: "pending_outbound_requests", 149 Help: "How many pending requests are waiting to be sent to GitHub servers.", 150 }) 151 152 var cachePartitionsCounter = prometheus.NewCounterVec( 153 prometheus.CounterOpts{ 154 Name: "ghcache_cache_parititions", 155 Help: "Which cache partitions exist.", 156 }, 157 []string{"token_hash"}, 158 ) 159 160 func init() { 161 162 prometheus.MustRegister(outboundConcurrencyGauge) 163 prometheus.MustRegister(pendingOutboundConnectionsGauge) 164 prometheus.MustRegister(cachePartitionsCounter) 165 } 166 167 func cacheResponseMode(headers http.Header) CacheResponseMode { 168 if strings.Contains(headers.Get("Cache-Control"), "no-store") { 169 return ModeNoStore 170 } 171 if strings.Contains(headers.Get("Status"), "304 Not Modified") { 172 return ModeRevalidated 173 } 174 if headers.Get("X-Conditional-Request") != "" { 175 return ModeChanged 176 } 177 return ModeMiss 178 } 179 180 func newThrottlingTransport(maxConcurrency int, roundTripper http.RoundTripper, hasher ghmetrics.Hasher, throttlingTimes RequestThrottlingTimes) http.RoundTripper { 181 return &throttlingTransport{ 182 sem: semaphore.NewWeighted(int64(maxConcurrency)), 183 roundTripper: roundTripper, 184 timeThrottlingEnabled: throttlingTimes.isEnabled(), 185 hasher: hasher, 186 registryApiV3: newTokensRegistry(throttlingTimes.throttlingTime, throttlingTimes.throttlingTimeForGET, throttlingTimes.maxDelayTime), 187 registryApiV4: newTokensRegistry(throttlingTimes.getThrottlingTimeV4(), throttlingTimes.throttlingTimeForGET, throttlingTimes.maxDelayTimeV4), 188 } 189 } 190 191 func newTokensRegistry(requestThrottlingTime, requestThrottlingTimeForGET, maxDelayTime uint) tokensRegistry { 192 return tokensRegistry{ 193 lock: sync.Mutex{}, 194 tokens: map[string]tokenInfo{}, 195 throttlingTime: time.Millisecond * time.Duration(requestThrottlingTime), 196 throttlingTimeForGET: time.Millisecond * time.Duration(requestThrottlingTimeForGET), 197 maxDelayTime: time.Second * time.Duration(maxDelayTime), 198 } 199 } 200 201 // tokenInfo keeps the last request timestamp and information whether it was GET request 202 type tokenInfo struct { 203 getReq bool 204 timestamp time.Time 205 } 206 207 // tokenRegistry keeps the timestamp of last handled request per token budget (appId or hash) 208 type tokensRegistry struct { 209 lock sync.Mutex 210 tokens map[string]tokenInfo 211 throttlingTime time.Duration 212 throttlingTimeForGET time.Duration 213 maxDelayTime time.Duration 214 } 215 216 func (tr *tokensRegistry) getRequestWaitDuration(tokenBudgetName string, getReq bool) time.Duration { 217 var duration time.Duration 218 tr.lock.Lock() 219 defer tr.lock.Unlock() 220 toQueue := time.Now() 221 if t, exists := tr.tokens[tokenBudgetName]; exists { 222 toQueue, duration = tr.calculateRequestWaitDuration(t, toQueue, getReq) 223 } 224 tr.tokens[tokenBudgetName] = tokenInfo{getReq: getReq, timestamp: toQueue} 225 return duration 226 } 227 228 func (tr *tokensRegistry) calculateRequestWaitDuration(lastRequest tokenInfo, toQueue time.Time, getReq bool) (time.Time, time.Duration) { 229 throttlingTime := tr.throttlingTime 230 // Previous request also was GET => use GET throttling time as a base 231 if lastRequest.getReq && getReq { 232 throttlingTime = tr.throttlingTimeForGET 233 } 234 duration := toQueue.Sub(lastRequest.timestamp) 235 236 if toQueue.Before(lastRequest.timestamp) || toQueue.Equal(lastRequest.timestamp) { 237 // There is already queued request, queue next afterwards. 238 difference := throttlingTime 239 if getReq { 240 difference = tr.throttlingTimeForGET 241 } 242 future := lastRequest.timestamp.Add(difference) 243 duration = future.Sub(toQueue) 244 245 // Do not exceed max wait time to avoid creating a huge request backlog if the GitHub api has performance issues 246 if duration >= tr.maxDelayTime { 247 duration = tr.maxDelayTime 248 future = toQueue.Add(tr.maxDelayTime) 249 } 250 toQueue = future 251 } else if duration >= throttlingTime || (getReq && duration >= tr.throttlingTimeForGET) { 252 // There was no request for some time, no need to wait. 253 duration = 0 254 } else { 255 // There is a queued request, wait until the next throttling tick. 256 difference := throttlingTime - duration 257 if getReq && !lastRequest.getReq { 258 difference = tr.throttlingTimeForGET - duration 259 } 260 duration = difference 261 toQueue = toQueue.Add(duration) 262 } 263 return toQueue, duration 264 } 265 266 // throttlingTransport throttles outbound concurrency from the proxy and adds QPS limit (1 request per given time) if enabled 267 type throttlingTransport struct { 268 sem *semaphore.Weighted 269 roundTripper http.RoundTripper 270 hasher ghmetrics.Hasher 271 timeThrottlingEnabled bool 272 registryApiV3 tokensRegistry 273 registryApiV4 tokensRegistry 274 } 275 276 func (c *throttlingTransport) getTokenBudgetName(req *http.Request) string { 277 if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" { 278 return val 279 } 280 return c.hasher.Hash(req) 281 } 282 283 func (c *throttlingTransport) holdRequest(req *http.Request) { 284 tokenBudgetName := c.getTokenBudgetName(req) 285 getReq := req.Method == http.MethodGet 286 var duration time.Duration 287 if strings.HasPrefix(req.URL.Path, "graphql") || strings.HasPrefix(req.URL.Path, "/graphql") { 288 duration = c.registryApiV4.getRequestWaitDuration(tokenBudgetName, getReq) 289 ghmetrics.CollectGitHubRequestWaitDurationMetrics(tokenBudgetName, req.Method, apiV4, duration) 290 } else { 291 duration = c.registryApiV3.getRequestWaitDuration(tokenBudgetName, getReq) 292 ghmetrics.CollectGitHubRequestWaitDurationMetrics(tokenBudgetName, req.Method, apiV3, duration) 293 } 294 if duration > 0 { 295 time.Sleep(duration) 296 } 297 } 298 299 func (c *throttlingTransport) RoundTrip(req *http.Request) (*http.Response, error) { 300 pendingOutboundConnectionsGauge.Inc() 301 if c.timeThrottlingEnabled { 302 c.holdRequest(req) 303 } 304 305 if err := c.sem.Acquire(context.Background(), 1); err != nil { 306 logrus.WithField("cache-key", req.URL.String()).WithError(err).Error("Internal error acquiring semaphore.") 307 return nil, err 308 } 309 defer c.sem.Release(1) 310 pendingOutboundConnectionsGauge.Dec() 311 outboundConcurrencyGauge.Inc() 312 defer outboundConcurrencyGauge.Dec() 313 return c.roundTripper.RoundTrip(req) 314 } 315 316 // upstreamTransport changes response headers from upstream before they 317 // reach the cache layer in order to force the caching policy we require. 318 // 319 // By default github responds to PR requests with: 320 // 321 // Cache-Control: private, max-age=60, s-maxage=60 322 // 323 // Which means the httpcache would not consider anything stale for 60 seconds. 324 // However, we want to always revalidate cache entries using ETags and last 325 // modified times so this RoundTripper overrides response headers to: 326 // 327 // Cache-Control: no-cache 328 // 329 // This instructs the cache to store the response, but always consider it stale. 330 type upstreamTransport struct { 331 roundTripper http.RoundTripper 332 hasher ghmetrics.Hasher 333 } 334 335 func (u upstreamTransport) RoundTrip(req *http.Request) (*http.Response, error) { 336 etag := req.Header.Get("if-none-match") 337 var tokenBudgetName string 338 if val := req.Header.Get(TokenBudgetIdentifierHeader); val != "" { 339 tokenBudgetName = val 340 } else { 341 tokenBudgetName = u.hasher.Hash(req) 342 } 343 344 reqStartTime := time.Now() 345 // Don't modify request, just pass to roundTripper. 346 resp, err := u.roundTripper.RoundTrip(req) 347 if err != nil { 348 ghmetrics.CollectRequestTimeoutMetrics(tokenBudgetName, req.URL.Path, req.Header.Get("User-Agent"), reqStartTime, time.Now()) 349 logrus.WithField("cache-key", req.URL.String()).WithError(err).Warn("Error from upstream (GitHub).") 350 return nil, err 351 } 352 responseTime := time.Now() 353 roundTripTime := responseTime.Sub(reqStartTime) 354 355 if resp.StatusCode >= 400 { 356 // Don't store errors. They can't be revalidated to save API tokens. 357 resp.Header.Set("Cache-Control", "no-store") 358 } else { 359 resp.Header.Set("Cache-Control", "no-cache") 360 if resp.StatusCode != http.StatusNotModified { 361 // Used for metrics about the age of cached requests 362 resp.Header.Set(cacheEntryCreationDateHeader, strconv.Itoa(int(time.Now().Unix()))) 363 } 364 } 365 if etag != "" { 366 resp.Header.Set("X-Conditional-Request", etag) 367 } 368 369 apiVersion := apiV3 370 if strings.HasPrefix(req.URL.Path, "graphql") || strings.HasPrefix(req.URL.Path, "/graphql") { 371 resp.Header.Set("Cache-Control", "no-store") 372 apiVersion = apiV4 373 } 374 375 ghmetrics.CollectGitHubTokenMetrics(tokenBudgetName, apiVersion, resp.Header, reqStartTime, responseTime) 376 ghmetrics.CollectGitHubRequestMetrics(tokenBudgetName, req.URL.Path, strconv.Itoa(resp.StatusCode), req.Header.Get("User-Agent"), roundTripTime.Seconds()) 377 378 return resp, nil 379 } 380 381 const LogMessageWithDiskPartitionFields = "Not using a partitioned cache because legacyDisablePartitioningByAuthHeader is true" 382 383 // NewDiskCache creates a GitHub cache RoundTripper that is backed by a disk 384 // cache. 385 // It supports a partitioned cache. 386 func NewDiskCache(roundTripper http.RoundTripper, cacheDir string, cacheSizeGB, maxConcurrency int, legacyDisablePartitioningByAuthHeader bool, cachePruneInterval time.Duration, throttlingTimes RequestThrottlingTimes) http.RoundTripper { 387 if legacyDisablePartitioningByAuthHeader { 388 diskCache := diskcache.NewWithDiskv( 389 diskv.New(diskv.Options{ 390 BasePath: path.Join(cacheDir, "data"), 391 TempDir: path.Join(cacheDir, "temp"), 392 CacheSizeMax: uint64(cacheSizeGB) * uint64(1000000000), // convert G to B 393 })) 394 return NewFromCache(roundTripper, 395 func(partitionKey string, _ *time.Time) httpcache.Cache { 396 logrus.WithField("cache-base-path", path.Join(cacheDir, "data", partitionKey)). 397 WithField("cache-temp-path", path.Join(cacheDir, "temp", partitionKey)). 398 Warning(LogMessageWithDiskPartitionFields) 399 return diskCache 400 }, 401 maxConcurrency, 402 throttlingTimes, 403 ) 404 } 405 406 go func() { 407 for range time.NewTicker(cachePruneInterval).C { 408 Prune(cacheDir, time.Now) 409 } 410 }() 411 return NewFromCache(roundTripper, 412 func(partitionKey string, expiresAt *time.Time) httpcache.Cache { 413 basePath := path.Join(cacheDir, "data", partitionKey) 414 tempDir := path.Join(cacheDir, "temp", partitionKey) 415 if err := writecachePartitionMetadata(basePath, tempDir, expiresAt); err != nil { 416 logrus.WithError(err).Warn("Failed to write cache metadata file, pruning will not work") 417 } 418 return diskcache.NewWithDiskv( 419 diskv.New(diskv.Options{ 420 BasePath: basePath, 421 TempDir: tempDir, 422 CacheSizeMax: uint64(cacheSizeGB) * uint64(1000000000), // convert G to B 423 })) 424 }, 425 maxConcurrency, 426 throttlingTimes, 427 ) 428 } 429 430 func Prune(baseDir string, now func() time.Time) { 431 // All of this would be easier if the structure was base/partition/{data,temp} 432 // but because of compatibility we can not change it. 433 for _, dir := range []string{"data", "temp"} { 434 base := path.Join(baseDir, dir) 435 cachePartitionCandidates, err := os.ReadDir(base) 436 if err != nil { 437 logrus.WithError(err).Warn("os.ReadDir failed") 438 // no continue, os.ReadDir returns partial results if it encounters an error 439 } 440 for _, cachePartitionCandidate := range cachePartitionCandidates { 441 if !cachePartitionCandidate.IsDir() { 442 continue 443 } 444 metadataPath := path.Join(base, cachePartitionCandidate.Name(), cachePartitionMetadataFileName) 445 446 // Read optimistically and just ignore errors 447 raw, err := os.ReadFile(metadataPath) 448 if err != nil { 449 continue 450 } 451 var metadata cachePartitionMetadata 452 if err := json.Unmarshal(raw, &metadata); err != nil { 453 logrus.WithError(err).WithField("filepath", metadataPath).Error("failed to deserialize metadata file") 454 continue 455 } 456 if metadata.ExpiresAt.After(now()) { 457 continue 458 } 459 paritionPath := filepath.Dir(metadataPath) 460 logrus.WithField("path", paritionPath).WithField("expiresAt", metadata.ExpiresAt.String()).Info("Cleaning up expired cache parition") 461 if err := os.RemoveAll(paritionPath); err != nil { 462 logrus.WithError(err).WithField("path", paritionPath).Error("failed to delete expired cache parition") 463 } 464 } 465 } 466 } 467 468 func writecachePartitionMetadata(basePath, tempDir string, expiresAt *time.Time) error { 469 // No expiry header for the token was passed, likely it is a PAT which never expires. 470 if expiresAt == nil { 471 return nil 472 } 473 metadata := cachePartitionMetadata{ExpiresAt: metav1.Time{Time: *expiresAt}} 474 serialized, err := json.Marshal(metadata) 475 if err != nil { 476 return fmt.Errorf("failed to serialize: %w", err) 477 } 478 479 var errs []error 480 for _, destBase := range []string{basePath, tempDir} { 481 if err := os.MkdirAll(destBase, 0755); err != nil { 482 errs = append(errs, fmt.Errorf("failed to create dir %s: %w", destBase, err)) 483 } 484 dest := path.Join(destBase, cachePartitionMetadataFileName) 485 if err := os.WriteFile(dest, serialized, 0644); err != nil { 486 errs = append(errs, fmt.Errorf("failed to write %s: %w", dest, err)) 487 } 488 } 489 490 return utilerrors.NewAggregate(errs) 491 } 492 493 const cachePartitionMetadataFileName = ".cache_metadata.json" 494 495 type cachePartitionMetadata struct { 496 ExpiresAt metav1.Time `json:"expires_at"` 497 } 498 499 // NewMemCache creates a GitHub cache RoundTripper that is backed by a memory 500 // cache. 501 // It supports a partitioned cache. 502 func NewMemCache(roundTripper http.RoundTripper, maxConcurrency int, throttlingTimes RequestThrottlingTimes) http.RoundTripper { 503 return NewFromCache(roundTripper, 504 func(_ string, _ *time.Time) httpcache.Cache { return httpcache.NewMemoryCache() }, 505 maxConcurrency, 506 throttlingTimes) 507 } 508 509 // CachePartitionCreator creates a new cache partition using the given key 510 type CachePartitionCreator func(partitionKey string, expiresAt *time.Time) httpcache.Cache 511 512 // NewFromCache creates a GitHub cache RoundTripper that is backed by the 513 // specified httpcache.Cache implementation. 514 func NewFromCache(roundTripper http.RoundTripper, cache CachePartitionCreator, maxConcurrency int, throttlingTimes RequestThrottlingTimes) http.RoundTripper { 515 hasher := ghmetrics.NewCachingHasher() 516 return newPartitioningRoundTripper(func(partitionKey string, expiresAt *time.Time) http.RoundTripper { 517 cacheTransport := httpcache.NewTransport(cache(partitionKey, expiresAt)) 518 cacheTransport.Transport = newThrottlingTransport(maxConcurrency, upstreamTransport{roundTripper: roundTripper, hasher: hasher}, hasher, throttlingTimes) 519 return &requestCoalescer{ 520 cache: make(map[string]*firstRequest), 521 requestExecutor: cacheTransport, 522 hasher: hasher, 523 } 524 }) 525 } 526 527 // NewRedisCache creates a GitHub cache RoundTripper that is backed by a Redis 528 // cache. 529 // Important note: The redis implementation does not support partitioning the cache 530 // which means that requests to the same path from different tokens will invalidate 531 // each other. 532 func NewRedisCache(roundTripper http.RoundTripper, redisAddress string, maxConcurrency int, throttlingTimes RequestThrottlingTimes) http.RoundTripper { 533 conn, err := redis.Dial("tcp", redisAddress) 534 if err != nil { 535 logrus.WithError(err).Fatal("Error connecting to Redis") 536 } 537 redisCache := rediscache.NewWithClient(conn) 538 return NewFromCache(roundTripper, 539 func(_ string, _ *time.Time) httpcache.Cache { return redisCache }, 540 maxConcurrency, 541 throttlingTimes) 542 }