github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/github/ghmetrics/ghmetrics.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package ghmetrics
    18  
    19  import (
    20  	"net/http"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	"github.com/sirupsen/logrus"
    28  )
    29  
    30  // ghTokenUntilResetGaugeVec provides the 'github_token_reset' gauge that
    31  // enables keeping track of GitHub reset times.
    32  var ghTokenUntilResetGaugeVec = prometheus.NewGaugeVec(
    33  	prometheus.GaugeOpts{
    34  		Name: "github_token_reset",
    35  		Help: "Last reported GitHub token reset time.",
    36  	},
    37  	[]string{"token_hash", "api_version", "ratelimit_resource"},
    38  )
    39  
    40  // ghTokenUsageGaugeVec provides the 'github_token_usage' gauge that
    41  // enables keeping track of GitHub calls and quotas.
    42  var ghTokenUsageGaugeVec = prometheus.NewGaugeVec(
    43  	prometheus.GaugeOpts{
    44  		Name: "github_token_usage",
    45  		Help: "How many GitHub token requets are remaining for the current hour.",
    46  	},
    47  	[]string{"token_hash", "api_version", "ratelimit_resource"},
    48  )
    49  
    50  // ghRequestDurationHistVec provides the 'github_request_duration' histogram that keeps track
    51  // of the duration of GitHub requests by API path.
    52  var ghRequestDurationHistVec = prometheus.NewHistogramVec(
    53  	prometheus.HistogramOpts{
    54  		Name:    "github_request_duration",
    55  		Help:    "GitHub request duration by API path.",
    56  		Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
    57  	},
    58  	[]string{"token_hash", "path", "status", "user_agent"},
    59  )
    60  
    61  // ghRequestDurationHistVec provides the 'github_request_duration' histogram that keeps track
    62  // of the duration of GitHub requests by API path.
    63  var ghRequestWaitDurationHistVec = prometheus.NewHistogramVec(
    64  	prometheus.HistogramOpts{
    65  		Name:    "github_request_wait_duration_seconds",
    66  		Help:    "GitHub request wait duration before sending to API in seconds",
    67  		Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10, 15, 20, 25, 30, 45, 60, 90, 120, 150, 180},
    68  	},
    69  	[]string{"token_hash", "request_type", "api"},
    70  )
    71  
    72  // cacheCounter provides the 'ghcache_responses' counter vec that is indexed
    73  // by the cache response mode.
    74  var cacheCounter = prometheus.NewCounterVec(
    75  	prometheus.CounterOpts{
    76  		Name: "ghcache_responses",
    77  		Help: "How many cache responses of each cache response mode there are.",
    78  	},
    79  	[]string{"mode", "path", "user_agent", "token_hash"},
    80  )
    81  
    82  // timeoutDuration provides the 'github_request_timeouts' histogram that keeps
    83  // track of the timeouts of GitHub requests by API path.
    84  var timeoutDuration = prometheus.NewHistogramVec(
    85  	prometheus.HistogramOpts{
    86  		Name:    "github_request_timeouts",
    87  		Help:    "GitHub request timeout by API path.",
    88  		Buckets: []float64{45, 60, 90, 120, 300},
    89  	},
    90  	[]string{"token_hash", "path", "user_agent"},
    91  )
    92  
    93  // cacheEntryAge tells us about the age of responses
    94  // that came from the cache.
    95  var cacheEntryAge = prometheus.NewHistogramVec(
    96  	prometheus.HistogramOpts{
    97  		Name:    "ghcache_cache_entry_age_seconds",
    98  		Help:    "The age of cache entries by API path.",
    99  		Buckets: []float64{5, 900, 1800, 3600, 7200, 14400},
   100  	},
   101  	[]string{"token_hash", "path", "user_agent"},
   102  )
   103  
   104  var muxTokenUsage sync.Mutex
   105  var lastGitHubResponse time.Time
   106  
   107  func init() {
   108  	prometheus.MustRegister(ghTokenUntilResetGaugeVec)
   109  	prometheus.MustRegister(ghTokenUsageGaugeVec)
   110  	prometheus.MustRegister(ghRequestDurationHistVec)
   111  	prometheus.MustRegister(ghRequestWaitDurationHistVec)
   112  	prometheus.MustRegister(cacheCounter)
   113  	prometheus.MustRegister(timeoutDuration)
   114  	prometheus.MustRegister(cacheEntryAge)
   115  }
   116  
   117  // CollectGitHubTokenMetrics publishes the rate limits of the github api to
   118  // `github_token_usage` as well as `github_token_reset` on prometheus.
   119  func CollectGitHubTokenMetrics(tokenHash, apiVersion string, headers http.Header, reqStartTime, responseTime time.Time) {
   120  	remaining := headers.Get("X-RateLimit-Remaining")
   121  	if remaining == "" {
   122  		return
   123  	}
   124  	resource := headers.Get("X-RateLimit-Resource")
   125  	timeUntilReset := timestampStringToTime(headers.Get("X-RateLimit-Reset"))
   126  	durationUntilReset := timeUntilReset.Sub(reqStartTime)
   127  
   128  	remainingFloat, err := strconv.ParseFloat(remaining, 64)
   129  	if err != nil {
   130  		logrus.WithError(err).Infof("Couldn't convert number of remaining token requests into gauge value (float)")
   131  	}
   132  	if remainingFloat == 0 {
   133  		logrus.WithFields(logrus.Fields{
   134  			"header":     remaining,
   135  			"user-agent": headers.Get("User-Agent"),
   136  		}).Debug("Parsed GitHub header as indicating no remaining rate-limit.")
   137  	}
   138  
   139  	muxTokenUsage.Lock()
   140  	isAfter := lastGitHubResponse.After(responseTime)
   141  	if !isAfter {
   142  		lastGitHubResponse = responseTime
   143  	}
   144  	muxTokenUsage.Unlock()
   145  	if isAfter {
   146  		logrus.WithField("last-github-response", lastGitHubResponse).WithField("response-time", responseTime).Debug("Previously pushed metrics of a newer response, skipping old metrics")
   147  	} else {
   148  		ghTokenUntilResetGaugeVec.With(prometheus.Labels{"token_hash": tokenHash, "api_version": apiVersion, "ratelimit_resource": resource}).Set(float64(durationUntilReset.Nanoseconds()))
   149  		ghTokenUsageGaugeVec.With(prometheus.Labels{"token_hash": tokenHash, "api_version": apiVersion, "ratelimit_resource": resource}).Set(remainingFloat)
   150  	}
   151  }
   152  
   153  // CollectGitHubRequestMetrics publishes the number of requests by API path to
   154  // `github_requests` on prometheus.
   155  func CollectGitHubRequestMetrics(tokenHash, path, statusCode, userAgent string, roundTripTime float64) {
   156  	ghRequestDurationHistVec.With(prometheus.Labels{"token_hash": tokenHash, "path": simplifier.Simplify(path), "status": statusCode, "user_agent": userAgentWithoutVersion(userAgent)}).Observe(roundTripTime)
   157  }
   158  
   159  // timestampStringToTime takes a unix timestamp and returns a `time.Time`
   160  // from the given time.
   161  func timestampStringToTime(tstamp string) time.Time {
   162  	timestamp, err := strconv.ParseInt(tstamp, 10, 64)
   163  	if err != nil {
   164  		logrus.WithField("timestamp", tstamp).Info("Couldn't convert unix timestamp")
   165  	}
   166  	return time.Unix(timestamp, 0)
   167  }
   168  
   169  // userAgentWithouVersion formats a user agent without the version to reduce label cardinality
   170  func userAgentWithoutVersion(userAgent string) string {
   171  	if !strings.Contains(userAgent, "/") {
   172  		return userAgent
   173  	}
   174  	return strings.SplitN(userAgent, "/", 2)[0]
   175  }
   176  
   177  // CollectCacheRequestMetrics records a cache outcome for a specific path
   178  func CollectCacheRequestMetrics(mode, path, userAgent, tokenHash string) {
   179  	cacheCounter.With(prometheus.Labels{"mode": mode, "path": simplifier.Simplify(path), "user_agent": userAgentWithoutVersion(userAgent), "token_hash": tokenHash}).Inc()
   180  }
   181  
   182  func CollectCacheEntryAgeMetrics(age float64, path, userAgent, tokenHash string) {
   183  	cacheEntryAge.With(prometheus.Labels{"path": simplifier.Simplify(path), "user_agent": userAgentWithoutVersion(userAgent), "token_hash": tokenHash}).Observe(age)
   184  }
   185  
   186  // CollectRequestTimeoutMetrics publishes the duration of timed-out requests by
   187  // API path to 'github_request_timeouts' on prometheus.
   188  func CollectRequestTimeoutMetrics(tokenHash, path, userAgent string, reqStartTime, responseTime time.Time) {
   189  	timeoutDuration.With(prometheus.Labels{"token_hash": tokenHash, "path": simplifier.Simplify(path), "user_agent": userAgentWithoutVersion(userAgent)}).Observe(float64(responseTime.Sub(reqStartTime).Seconds()))
   190  }
   191  
   192  // CollectGitHubRequestWaitDurationMetrics publishes the wait duration of requests
   193  // before sending to respective GitHub API on prometheus.
   194  func CollectGitHubRequestWaitDurationMetrics(tokenHash, requestType, api string, duration time.Duration) {
   195  	ghRequestWaitDurationHistVec.With(prometheus.Labels{"token_hash": tokenHash, "request_type": requestType, "api": api}).Observe(duration.Seconds())
   196  }