github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/heapster.go (about) 1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:generate ./hooks/run_extpoints.sh 16 17 package main 18 19 import ( 20 "crypto/tls" 21 "errors" 22 "fmt" 23 "net" 24 "net/http" 25 "net/url" 26 "os" 27 "runtime" 28 "strconv" 29 "strings" 30 "time" 31 32 "github.com/golang/glog" 33 "github.com/prometheus/client_golang/prometheus" 34 "github.com/spf13/pflag" 35 36 kube_api "k8s.io/api/core/v1" 37 "k8s.io/apimachinery/pkg/fields" 38 "k8s.io/apimachinery/pkg/util/wait" 39 "k8s.io/apiserver/pkg/server/healthz" 40 "k8s.io/apiserver/pkg/util/flag" 41 "k8s.io/apiserver/pkg/util/logs" 42 kube_client "k8s.io/client-go/kubernetes" 43 v1listers "k8s.io/client-go/listers/core/v1" 44 "k8s.io/client-go/tools/cache" 45 "k8s.io/heapster/common/flags" 46 kube_config "k8s.io/heapster/common/kubernetes" 47 "k8s.io/heapster/metrics/cmd/heapster-apiserver/app" 48 "k8s.io/heapster/metrics/core" 49 "k8s.io/heapster/metrics/manager" 50 "k8s.io/heapster/metrics/options" 51 "k8s.io/heapster/metrics/processors" 52 "k8s.io/heapster/metrics/sinks" 53 metricsink "k8s.io/heapster/metrics/sinks/metric" 54 "k8s.io/heapster/metrics/sources" 55 "k8s.io/heapster/metrics/util" 56 "k8s.io/heapster/version" 57 ) 58 59 func main() { 60 opt := options.NewHeapsterRunOptions() 61 opt.AddFlags(pflag.CommandLine) 62 63 flag.InitFlags() 64 65 if opt.Version { 66 fmt.Println(version.VersionInfo()) 67 os.Exit(0) 68 } 69 70 logs.InitLogs() 71 defer logs.FlushLogs() 72 73 labelCopier, err := util.NewLabelCopier(opt.LabelSeparator, opt.StoredLabels, opt.IgnoredLabels) 74 if err != nil { 75 glog.Fatalf("Failed to initialize label copier: %v", err) 76 } 77 78 setMaxProcs(opt) 79 glog.Infof(strings.Join(os.Args, " ")) 80 glog.Infof("Heapster version %v", version.HeapsterVersion) 81 if err := validateFlags(opt); err != nil { 82 glog.Fatal(err) 83 } 84 85 kubernetesUrl, err := getKubernetesAddress(opt.Sources) 86 if err != nil { 87 glog.Fatalf("Failed to get kubernetes address: %v", err) 88 } 89 sourceManager := createSourceManagerOrDie(opt.Sources) 90 sinkManager, metricSink, historicalSource := createAndInitSinksOrDie(opt.Sinks, opt.HistoricalSource, opt.SinkExportDataTimeout, opt.DisableMetricSink) 91 92 podLister, nodeLister := getListersOrDie(kubernetesUrl) 93 dataProcessors := createDataProcessorsOrDie(kubernetesUrl, podLister, labelCopier) 94 95 man, err := manager.NewManager(sourceManager, dataProcessors, sinkManager, 96 opt.MetricResolution, manager.DefaultScrapeOffset, manager.DefaultMaxParallelism) 97 if err != nil { 98 glog.Fatalf("Failed to create main manager: %v", err) 99 } 100 man.Start() 101 102 if opt.EnableAPIServer { 103 // Run API server in a separate goroutine 104 createAndRunAPIServer(opt, metricSink, nodeLister, podLister) 105 } 106 107 mux := http.NewServeMux() 108 promHandler := prometheus.Handler() 109 handler := setupHandlers(metricSink, podLister, nodeLister, historicalSource, opt.DisableMetricExport) 110 healthz.InstallHandler(mux, healthzChecker(metricSink)) 111 112 addr := net.JoinHostPort(opt.Ip, strconv.Itoa(opt.Port)) 113 glog.Infof("Starting heapster on port %d", opt.Port) 114 115 if len(opt.TLSCertFile) > 0 && len(opt.TLSKeyFile) > 0 { 116 startSecureServing(opt, handler, promHandler, mux, addr) 117 } else { 118 mux.Handle("/", handler) 119 mux.Handle("/metrics", promHandler) 120 121 glog.Fatal(http.ListenAndServe(addr, mux)) 122 } 123 } 124 func createAndRunAPIServer(opt *options.HeapsterRunOptions, metricSink *metricsink.MetricSink, 125 nodeLister v1listers.NodeLister, podLister v1listers.PodLister) { 126 127 server, err := app.NewHeapsterApiServer(opt, metricSink, nodeLister, podLister) 128 if err != nil { 129 glog.Errorf("Could not create the API server: %v", err) 130 return 131 } 132 133 server.AddHealthzChecks(healthzChecker(metricSink)) 134 135 runApiServer := func(s *app.HeapsterAPIServer) { 136 if err := s.RunServer(); err != nil { 137 fmt.Fprintf(os.Stderr, "%v\n", err) 138 os.Exit(1) 139 } 140 } 141 glog.Infof("Starting Heapster API server...") 142 go runApiServer(server) 143 } 144 145 func startSecureServing(opt *options.HeapsterRunOptions, handler http.Handler, promHandler http.Handler, 146 mux *http.ServeMux, address string) { 147 148 if len(opt.TLSClientCAFile) > 0 { 149 authPprofHandler, err := newAuthHandler(opt, handler) 150 if err != nil { 151 glog.Fatalf("Failed to create authorized pprof handler: %v", err) 152 } 153 handler = authPprofHandler 154 155 authPromHandler, err := newAuthHandler(opt, promHandler) 156 if err != nil { 157 glog.Fatalf("Failed to create authorized prometheus handler: %v", err) 158 } 159 promHandler = authPromHandler 160 } 161 mux.Handle("/", handler) 162 mux.Handle("/metrics", promHandler) 163 164 // If allowed users is set, then we need to enable Client Authentication 165 if len(opt.AllowedUsers) > 0 { 166 server := &http.Server{ 167 Addr: address, 168 Handler: mux, 169 TLSConfig: &tls.Config{ClientAuth: tls.RequestClientCert}, 170 } 171 glog.Fatal(server.ListenAndServeTLS(opt.TLSCertFile, opt.TLSKeyFile)) 172 } else { 173 glog.Fatal(http.ListenAndServeTLS(address, opt.TLSCertFile, opt.TLSKeyFile, mux)) 174 } 175 } 176 177 func createSourceManagerOrDie(src flags.Uris) core.MetricsSource { 178 if len(src) != 1 { 179 glog.Fatal("Wrong number of sources specified") 180 } 181 sourceFactory := sources.NewSourceFactory() 182 sourceProvider, err := sourceFactory.BuildAll(src) 183 if err != nil { 184 glog.Fatalf("Failed to create source provide: %v", err) 185 } 186 sourceManager, err := sources.NewSourceManager(sourceProvider, sources.DefaultMetricsScrapeTimeout) 187 if err != nil { 188 glog.Fatalf("Failed to create source manager: %v", err) 189 } 190 return sourceManager 191 } 192 193 func createAndInitSinksOrDie(sinkAddresses flags.Uris, historicalSource string, sinkExportDataTimeout time.Duration, disableMetricSink bool) (core.DataSink, *metricsink.MetricSink, core.HistoricalSource) { 194 sinksFactory := sinks.NewSinkFactory() 195 metricSink, sinkList, histSource := sinksFactory.BuildAll(sinkAddresses, historicalSource, disableMetricSink) 196 if metricSink == nil && !disableMetricSink { 197 glog.Fatal("Failed to create metric sink") 198 } 199 if histSource == nil && len(historicalSource) > 0 { 200 glog.Fatal("Failed to use a sink as a historical metrics source") 201 } 202 for _, sink := range sinkList { 203 glog.Infof("Starting with %s", sink.Name()) 204 } 205 sinkManager, err := sinks.NewDataSinkManager(sinkList, sinkExportDataTimeout, sinks.DefaultSinkStopTimeout) 206 if err != nil { 207 glog.Fatalf("Failed to create sink manager: %v", err) 208 } 209 return sinkManager, metricSink, histSource 210 } 211 212 func getListersOrDie(kubernetesUrl *url.URL) (v1listers.PodLister, v1listers.NodeLister) { 213 kubeClient := createKubeClientOrDie(kubernetesUrl) 214 215 podLister, err := getPodLister(kubeClient) 216 if err != nil { 217 glog.Fatalf("Failed to create podLister: %v", err) 218 } 219 nodeLister, _, err := util.GetNodeLister(kubeClient) 220 if err != nil { 221 glog.Fatalf("Failed to create nodeLister: %v", err) 222 } 223 return podLister, nodeLister 224 } 225 226 func createKubeClientOrDie(kubernetesUrl *url.URL) *kube_client.Clientset { 227 kubeConfig, err := kube_config.GetKubeClientConfig(kubernetesUrl) 228 if err != nil { 229 glog.Fatalf("Failed to get client config: %v", err) 230 } 231 return kube_client.NewForConfigOrDie(kubeConfig) 232 } 233 234 func createDataProcessorsOrDie(kubernetesUrl *url.URL, podLister v1listers.PodLister, labelCopier *util.LabelCopier) []core.DataProcessor { 235 dataProcessors := []core.DataProcessor{ 236 // Convert cumulative to rate 237 processors.NewRateCalculator(core.RateMetricsMapping), 238 } 239 240 podBasedEnricher, err := processors.NewPodBasedEnricher(podLister, labelCopier) 241 if err != nil { 242 glog.Fatalf("Failed to create PodBasedEnricher: %v", err) 243 } 244 dataProcessors = append(dataProcessors, podBasedEnricher) 245 246 namespaceBasedEnricher, err := processors.NewNamespaceBasedEnricher(kubernetesUrl) 247 if err != nil { 248 glog.Fatalf("Failed to create NamespaceBasedEnricher: %v", err) 249 } 250 dataProcessors = append(dataProcessors, namespaceBasedEnricher) 251 252 // aggregators 253 metricsToAggregate := []string{ 254 core.MetricCpuUsageRate.Name, 255 core.MetricMemoryUsage.Name, 256 core.MetricCpuRequest.Name, 257 core.MetricCpuLimit.Name, 258 core.MetricMemoryRequest.Name, 259 core.MetricMemoryLimit.Name, 260 } 261 262 metricsToAggregateForNode := []string{ 263 core.MetricCpuRequest.Name, 264 core.MetricCpuLimit.Name, 265 core.MetricMemoryRequest.Name, 266 core.MetricMemoryLimit.Name, 267 core.MetricEphemeralStorageRequest.Name, 268 core.MetricEphemeralStorageLimit.Name, 269 } 270 271 dataProcessors = append(dataProcessors, 272 processors.NewPodAggregator(), 273 &processors.NamespaceAggregator{ 274 MetricsToAggregate: metricsToAggregate, 275 }, 276 &processors.NodeAggregator{ 277 MetricsToAggregate: metricsToAggregateForNode, 278 }, 279 &processors.ClusterAggregator{ 280 MetricsToAggregate: metricsToAggregate, 281 }) 282 283 nodeAutoscalingEnricher, err := processors.NewNodeAutoscalingEnricher(kubernetesUrl, labelCopier) 284 if err != nil { 285 glog.Fatalf("Failed to create NodeAutoscalingEnricher: %v", err) 286 } 287 dataProcessors = append(dataProcessors, nodeAutoscalingEnricher) 288 return dataProcessors 289 } 290 291 const ( 292 minMetricsCount = 1 293 maxMetricsDelay = 3 * time.Minute 294 ) 295 296 func healthzChecker(metricSink *metricsink.MetricSink) healthz.HealthzChecker { 297 return healthz.NamedCheck("healthz", func(r *http.Request) error { 298 batch := metricSink.GetLatestDataBatch() 299 if batch == nil { 300 return errors.New("could not get the latest data batch") 301 } 302 if time.Since(batch.Timestamp) > maxMetricsDelay { 303 message := fmt.Sprintf("No current data batch available (latest: %s).", batch.Timestamp.String()) 304 glog.Warningf(message) 305 return errors.New(message) 306 } 307 if len(batch.MetricSets) < minMetricsCount { 308 message := fmt.Sprintf("Not enough metrics found in the latest data batch: %d (expected min. %d) %s", len(batch.MetricSets), minMetricsCount, batch.Timestamp.String()) 309 glog.Warningf(message) 310 return errors.New(message) 311 } 312 return nil 313 }) 314 } 315 316 // Gets the address of the kubernetes source from the list of source URIs. 317 // Possible kubernetes sources are: 'kubernetes' and 'kubernetes.summary_api' 318 func getKubernetesAddress(args flags.Uris) (*url.URL, error) { 319 for _, uri := range args { 320 if strings.SplitN(uri.Key, ".", 2)[0] == "kubernetes" { 321 return &uri.Val, nil 322 } 323 } 324 return nil, fmt.Errorf("No kubernetes source found.") 325 } 326 327 func getPodLister(kubeClient *kube_client.Clientset) (v1listers.PodLister, error) { 328 lw := cache.NewListWatchFromClient(kubeClient.CoreV1().RESTClient(), "pods", kube_api.NamespaceAll, fields.Everything()) 329 store := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) 330 podLister := v1listers.NewPodLister(store) 331 reflector := cache.NewReflector(lw, &kube_api.Pod{}, store, time.Hour) 332 go reflector.Run(wait.NeverStop) 333 return podLister, nil 334 } 335 336 func validateFlags(opt *options.HeapsterRunOptions) error { 337 if opt.MetricResolution < 5*time.Second { 338 return fmt.Errorf("metric resolution should not be less than 5 seconds - %d", opt.MetricResolution) 339 } 340 if (len(opt.TLSCertFile) > 0 && len(opt.TLSKeyFile) == 0) || (len(opt.TLSCertFile) == 0 && len(opt.TLSKeyFile) > 0) { 341 return fmt.Errorf("both TLS certificate & key are required to enable TLS serving") 342 } 343 if len(opt.TLSClientCAFile) > 0 && len(opt.TLSCertFile) == 0 { 344 return fmt.Errorf("client cert authentication requires TLS certificate & key") 345 } 346 return nil 347 } 348 349 func setMaxProcs(opt *options.HeapsterRunOptions) { 350 // Allow as many threads as we have cores unless the user specified a value. 351 var numProcs int 352 if opt.MaxProcs < 1 { 353 numProcs = runtime.NumCPU() 354 } else { 355 numProcs = opt.MaxProcs 356 } 357 runtime.GOMAXPROCS(numProcs) 358 359 // Check if the setting was successful. 360 actualNumProcs := runtime.GOMAXPROCS(0) 361 if actualNumProcs != numProcs { 362 glog.Warningf("Specified max procs of %d but using %d", numProcs, actualNumProcs) 363 } 364 }