sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/cmd/ghproxy/ghproxy.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "errors" 21 "flag" 22 "fmt" 23 "net/http" 24 "net/http/httputil" 25 "net/url" 26 "strconv" 27 "time" 28 29 "github.com/prometheus/client_golang/prometheus" 30 "github.com/sirupsen/logrus" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "sigs.k8s.io/prow/pkg/pjutil/pprof" 33 34 "sigs.k8s.io/prow/pkg/apptokenequalizer" 35 "sigs.k8s.io/prow/pkg/config" 36 "sigs.k8s.io/prow/pkg/diskutil" 37 "sigs.k8s.io/prow/pkg/flagutil" 38 "sigs.k8s.io/prow/pkg/ghcache" 39 "sigs.k8s.io/prow/pkg/interrupts" 40 "sigs.k8s.io/prow/pkg/logrusutil" 41 "sigs.k8s.io/prow/pkg/metrics" 42 "sigs.k8s.io/prow/pkg/pjutil" 43 ) 44 45 var ( 46 diskFree = prometheus.NewGauge(prometheus.GaugeOpts{ 47 Name: "ghcache_disk_free", 48 Help: "Free gb on github-cache disk.", 49 }) 50 diskUsed = prometheus.NewGauge(prometheus.GaugeOpts{ 51 Name: "ghcache_disk_used", 52 Help: "Used gb on github-cache disk.", 53 }) 54 diskTotal = prometheus.NewGauge(prometheus.GaugeOpts{ 55 Name: "ghcache_disk_total", 56 Help: "Total gb on github-cache disk.", 57 }) 58 diskInodeFree = prometheus.NewGauge(prometheus.GaugeOpts{ 59 Name: "ghcache_disk_inode_free", 60 Help: "Free inodes on github-cache disk.", 61 }) 62 diskInodeUsed = prometheus.NewGauge(prometheus.GaugeOpts{ 63 Name: "ghcache_disk_inode_used", 64 Help: "Used inodes on github-cache disk.", 65 }) 66 diskInodeTotal = prometheus.NewGauge(prometheus.GaugeOpts{ 67 Name: "ghcache_disk_inode_total", 68 Help: "Total inodes on github-cache disk.", 69 }) 70 ) 71 72 func init() { 73 prometheus.MustRegister(diskFree) 74 prometheus.MustRegister(diskUsed) 75 prometheus.MustRegister(diskTotal) 76 prometheus.MustRegister(diskInodeFree) 77 prometheus.MustRegister(diskInodeUsed) 78 prometheus.MustRegister(diskInodeTotal) 79 } 80 81 // GitHub reverse proxy HTTP cache RoundTripper stack: 82 // v - <Client(s)> 83 // v ^ reverse proxy 84 // v ^ ghcache: downstreamTransport (coalescing, instrumentation) 85 // v ^ ghcache: httpcache layer 86 // v ^ ghcache: upstreamTransport (cache-control, instrumentation) 87 // v ^ apptokenequalizer: Make sure all clients get the same app installation token so they can share a cache 88 // v ^ http.DefaultTransport 89 // > ^ <Upstream> 90 91 type options struct { 92 dir string 93 sizeGB int 94 diskCacheDisableAuthHeaderPartitioning bool 95 96 redisAddress string 97 98 port int 99 upstream string 100 upstreamParsed *url.URL 101 102 maxConcurrency int 103 requestThrottlingTime uint 104 requestThrottlingTimeV4 uint 105 requestThrottlingTimeForGET uint 106 requestThrottlingMaxDelayTime uint 107 requestThrottlingMaxDelayTimeV4 uint 108 109 // pushGateway fields are used to configure pushing prometheus metrics. 110 pushGateway string 111 pushGatewayInterval time.Duration 112 113 logLevel string 114 115 serveMetrics bool 116 117 instrumentationOptions flagutil.InstrumentationOptions 118 119 timeout uint 120 } 121 122 func (o *options) validate() error { 123 level, err := logrus.ParseLevel(o.logLevel) 124 if err != nil { 125 return fmt.Errorf("invalid log level specified: %w", err) 126 } 127 logrus.SetLevel(level) 128 129 if (o.dir == "") != (o.sizeGB == 0) { 130 return errors.New("--cache-dir and --cache-sizeGB must be specified together to enable the disk cache (otherwise a memory cache is used)") 131 } 132 upstreamURL, err := url.Parse(o.upstream) 133 if err != nil { 134 return fmt.Errorf("failed to parse upstream URL: %w", err) 135 } 136 o.upstreamParsed = upstreamURL 137 return nil 138 } 139 140 func flagOptions() *options { 141 o := &options{} 142 flag.StringVar(&o.dir, "cache-dir", "", "Directory to cache to if using a disk cache.") 143 flag.IntVar(&o.sizeGB, "cache-sizeGB", 0, "Cache size in GB per unique token if using a disk cache.") 144 flag.BoolVar(&o.diskCacheDisableAuthHeaderPartitioning, "legacy-disable-disk-cache-partitions-by-auth-header", true, "Whether to disable partitioning a disk cache by auth header. Disabling this will start a new cache at $cache_dir/$sha256sum_of_authorization_header for each unique authorization header. Bigger setups are advise to manually warm this up from an existing cache. This option will be removed and set to `false` in the future") 145 flag.StringVar(&o.redisAddress, "redis-address", "", "Redis address if using a redis cache e.g. localhost:6379.") 146 flag.IntVar(&o.port, "port", 8888, "Port to listen on.") 147 flag.StringVar(&o.upstream, "upstream", "https://api.github.com", "Scheme, host, and base path of reverse proxy upstream.") 148 flag.IntVar(&o.maxConcurrency, "concurrency", 25, "Maximum number of concurrent in-flight requests to GitHub.") 149 flag.UintVar(&o.requestThrottlingTime, "throttling-time-ms", 0, "Additional throttling mechanism which imposes time spacing between outgoing requests. Counted per organization. Has to be set together with --get-throttling-time-ms.") 150 flag.UintVar(&o.requestThrottlingTimeV4, "throttling-time-v4-ms", 0, "Additional throttling mechanism which imposes time spacing between outgoing requests. Counted per organization. Overrides --throttling-time-ms setting for API v4.") 151 flag.UintVar(&o.requestThrottlingTimeForGET, "get-throttling-time-ms", 0, "Additional throttling mechanism which imposes time spacing between outgoing GET requests. Counted per organization. Has to be set together with --throttling-time-ms.") 152 flag.UintVar(&o.requestThrottlingMaxDelayTime, "throttling-max-delay-duration-seconds", 30, "Maximum delay for throttling in seconds. Requests will never be throttled for longer than this, used to avoid building a request backlog when the GitHub api has performance issues. Default is 30 seconds.") 153 flag.UintVar(&o.requestThrottlingMaxDelayTimeV4, "throttling-max-delay-duration-v4-seconds", 30, "Maximum delay for throttling in seconds for APIv4. Requests will never be throttled for longer than this, used to avoid building a request backlog when the GitHub api has performance issues. Default is 30 seconds.") 154 flag.StringVar(&o.pushGateway, "push-gateway", "", "If specified, push prometheus metrics to this endpoint.") 155 flag.DurationVar(&o.pushGatewayInterval, "push-gateway-interval", time.Minute, "Interval at which prometheus metrics are pushed.") 156 flag.StringVar(&o.logLevel, "log-level", "debug", fmt.Sprintf("Log level is one of %v.", logrus.AllLevels)) 157 flag.BoolVar(&o.serveMetrics, "serve-metrics", false, "If true, it serves prometheus metrics") 158 flag.UintVar(&o.timeout, "request-timeout", 30, "Request timeout which applies also to paged requests. Default is 30 seconds.") 159 o.instrumentationOptions.AddFlags(flag.CommandLine) 160 return o 161 } 162 163 func main() { 164 logrusutil.ComponentInit() 165 166 o := flagOptions() 167 flag.Parse() 168 if err := o.validate(); err != nil { 169 logrus.WithError(err).Fatal("Invalid arguments.") 170 } 171 172 if o.diskCacheDisableAuthHeaderPartitioning { 173 logrus.Warningf("The deprecated `--legacy-disable-disk-cache-partitions-by-auth-header` flags value is `true`. If you are a bigger Prow setup, you should copy your existing cache directory to the directory mentioned in the `%s` messages to warm up the partitioned-by-auth-header cache, then set the flag to false. If you are a smaller Prow setup or just started using ghproxy you can just unconditionally set it to `false`.", ghcache.LogMessageWithDiskPartitionFields) 174 } 175 176 if (o.requestThrottlingTime > 0 && o.requestThrottlingTimeForGET == 0) || 177 (o.requestThrottlingTime == 0 && o.requestThrottlingTimeForGET > 0) || 178 ((o.requestThrottlingTime == 0 || o.requestThrottlingTimeForGET == 0) && o.requestThrottlingTimeV4 > 0) { 179 logrus.Warningln("Flags `--throttling-time-ms` and `--get-throttling-time-ms` have to be set to non-zero value, otherwise throttling feature will be disabled.") 180 } 181 182 pprof.Instrument(o.instrumentationOptions) 183 defer interrupts.WaitForGracefulShutdown() 184 metrics.ExposeMetrics("ghproxy", config.PushGateway{ 185 Endpoint: o.pushGateway, 186 Interval: &metav1.Duration{ 187 Duration: o.pushGatewayInterval, 188 }, 189 ServeMetrics: o.serveMetrics, 190 }, o.instrumentationOptions.MetricsPort) 191 192 proxy := proxy(o, http.DefaultTransport, time.Hour) 193 server := &http.Server{Addr: ":" + strconv.Itoa(o.port), Handler: proxy} 194 195 health := pjutil.NewHealthOnPort(o.instrumentationOptions.HealthPort) 196 health.ServeReady() 197 198 interrupts.ListenAndServe(server, time.Duration(o.timeout)*time.Second) 199 } 200 201 func proxy(o *options, upstreamTransport http.RoundTripper, diskCachePruneInterval time.Duration) http.Handler { 202 var cache http.RoundTripper 203 throttlingTimes := ghcache.NewRequestThrottlingTimes(o.requestThrottlingTime, o.requestThrottlingTimeV4, o.requestThrottlingTimeForGET, o.requestThrottlingMaxDelayTime, o.requestThrottlingMaxDelayTimeV4) 204 if o.redisAddress != "" { 205 cache = ghcache.NewRedisCache(apptokenequalizer.New(upstreamTransport), o.redisAddress, o.maxConcurrency, throttlingTimes) 206 } else if o.dir == "" { 207 cache = ghcache.NewMemCache(apptokenequalizer.New(upstreamTransport), o.maxConcurrency, throttlingTimes) 208 } else { 209 cache = ghcache.NewDiskCache(apptokenequalizer.New(upstreamTransport), o.dir, o.sizeGB, o.maxConcurrency, o.diskCacheDisableAuthHeaderPartitioning, diskCachePruneInterval, throttlingTimes) 210 go diskMonitor(o.pushGatewayInterval, o.dir) 211 } 212 213 return newReverseProxy(o.upstreamParsed, cache, time.Duration(o.timeout)*time.Second) 214 } 215 216 func newReverseProxy(upstreamURL *url.URL, transport http.RoundTripper, timeout time.Duration) http.Handler { 217 proxy := httputil.NewSingleHostReverseProxy(upstreamURL) 218 // Wrap the director to change the upstream request 'Host' header to the 219 // target host. 220 director := proxy.Director 221 proxy.Director = func(req *http.Request) { 222 director(req) 223 req.Host = req.URL.Host 224 } 225 proxy.Transport = transport 226 227 return http.TimeoutHandler(proxy, timeout, fmt.Sprintf("ghproxy timed out after %v", timeout)) 228 } 229 230 // helper to update disk metrics (copied from greenhouse) 231 func diskMonitor(interval time.Duration, diskRoot string) { 232 logger := logrus.WithField("sync-loop", "disk-monitor") 233 ticker := time.NewTicker(interval) 234 for ; true; <-ticker.C { 235 logger.Info("tick") 236 _, bytesFree, bytesUsed, _, inodesFree, inodesUsed, err := diskutil.GetDiskUsage(diskRoot) 237 if err != nil { 238 logger.WithError(err).Error("Failed to get disk metrics") 239 } else { 240 diskFree.Set(float64(bytesFree) / 1e9) 241 diskUsed.Set(float64(bytesUsed) / 1e9) 242 diskTotal.Set(float64(bytesFree+bytesUsed) / 1e9) 243 diskInodeFree.Set(float64(inodesFree)) 244 diskInodeUsed.Set(float64(inodesUsed)) 245 diskInodeTotal.Set(float64(inodesFree + inodesUsed)) 246 } 247 } 248 }