github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/heapster.go (about)

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:generate ./hooks/run_extpoints.sh
    16  
    17  package main
    18  
    19  import (
    20  	"crypto/tls"
    21  	"errors"
    22  	"fmt"
    23  	"net"
    24  	"net/http"
    25  	"net/url"
    26  	"os"
    27  	"runtime"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/golang/glog"
    33  	"github.com/prometheus/client_golang/prometheus"
    34  	"github.com/spf13/pflag"
    35  
    36  	kube_api "k8s.io/api/core/v1"
    37  	"k8s.io/apimachinery/pkg/fields"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  	"k8s.io/apiserver/pkg/server/healthz"
    40  	"k8s.io/apiserver/pkg/util/flag"
    41  	"k8s.io/apiserver/pkg/util/logs"
    42  	kube_client "k8s.io/client-go/kubernetes"
    43  	v1listers "k8s.io/client-go/listers/core/v1"
    44  	"k8s.io/client-go/tools/cache"
    45  	"k8s.io/heapster/common/flags"
    46  	kube_config "k8s.io/heapster/common/kubernetes"
    47  	"k8s.io/heapster/metrics/cmd/heapster-apiserver/app"
    48  	"k8s.io/heapster/metrics/core"
    49  	"k8s.io/heapster/metrics/manager"
    50  	"k8s.io/heapster/metrics/options"
    51  	"k8s.io/heapster/metrics/processors"
    52  	"k8s.io/heapster/metrics/sinks"
    53  	metricsink "k8s.io/heapster/metrics/sinks/metric"
    54  	"k8s.io/heapster/metrics/sources"
    55  	"k8s.io/heapster/metrics/util"
    56  	"k8s.io/heapster/version"
    57  )
    58  
    59  func main() {
    60  	opt := options.NewHeapsterRunOptions()
    61  	opt.AddFlags(pflag.CommandLine)
    62  
    63  	flag.InitFlags()
    64  
    65  	if opt.Version {
    66  		fmt.Println(version.VersionInfo())
    67  		os.Exit(0)
    68  	}
    69  
    70  	logs.InitLogs()
    71  	defer logs.FlushLogs()
    72  
    73  	labelCopier, err := util.NewLabelCopier(opt.LabelSeparator, opt.StoredLabels, opt.IgnoredLabels)
    74  	if err != nil {
    75  		glog.Fatalf("Failed to initialize label copier: %v", err)
    76  	}
    77  
    78  	setMaxProcs(opt)
    79  	glog.Infof(strings.Join(os.Args, " "))
    80  	glog.Infof("Heapster version %v", version.HeapsterVersion)
    81  	if err := validateFlags(opt); err != nil {
    82  		glog.Fatal(err)
    83  	}
    84  
    85  	kubernetesUrl, err := getKubernetesAddress(opt.Sources)
    86  	if err != nil {
    87  		glog.Fatalf("Failed to get kubernetes address: %v", err)
    88  	}
    89  	sourceManager := createSourceManagerOrDie(opt.Sources)
    90  	sinkManager, metricSink, historicalSource := createAndInitSinksOrDie(opt.Sinks, opt.HistoricalSource, opt.SinkExportDataTimeout, opt.DisableMetricSink)
    91  
    92  	podLister, nodeLister := getListersOrDie(kubernetesUrl)
    93  	dataProcessors := createDataProcessorsOrDie(kubernetesUrl, podLister, labelCopier)
    94  
    95  	man, err := manager.NewManager(sourceManager, dataProcessors, sinkManager,
    96  		opt.MetricResolution, manager.DefaultScrapeOffset, manager.DefaultMaxParallelism)
    97  	if err != nil {
    98  		glog.Fatalf("Failed to create main manager: %v", err)
    99  	}
   100  	man.Start()
   101  
   102  	if opt.EnableAPIServer {
   103  		// Run API server in a separate goroutine
   104  		createAndRunAPIServer(opt, metricSink, nodeLister, podLister)
   105  	}
   106  
   107  	mux := http.NewServeMux()
   108  	promHandler := prometheus.Handler()
   109  	handler := setupHandlers(metricSink, podLister, nodeLister, historicalSource, opt.DisableMetricExport)
   110  	healthz.InstallHandler(mux, healthzChecker(metricSink))
   111  
   112  	addr := net.JoinHostPort(opt.Ip, strconv.Itoa(opt.Port))
   113  	glog.Infof("Starting heapster on port %d", opt.Port)
   114  
   115  	if len(opt.TLSCertFile) > 0 && len(opt.TLSKeyFile) > 0 {
   116  		startSecureServing(opt, handler, promHandler, mux, addr)
   117  	} else {
   118  		mux.Handle("/", handler)
   119  		mux.Handle("/metrics", promHandler)
   120  
   121  		glog.Fatal(http.ListenAndServe(addr, mux))
   122  	}
   123  }
   124  func createAndRunAPIServer(opt *options.HeapsterRunOptions, metricSink *metricsink.MetricSink,
   125  	nodeLister v1listers.NodeLister, podLister v1listers.PodLister) {
   126  
   127  	server, err := app.NewHeapsterApiServer(opt, metricSink, nodeLister, podLister)
   128  	if err != nil {
   129  		glog.Errorf("Could not create the API server: %v", err)
   130  		return
   131  	}
   132  
   133  	server.AddHealthzChecks(healthzChecker(metricSink))
   134  
   135  	runApiServer := func(s *app.HeapsterAPIServer) {
   136  		if err := s.RunServer(); err != nil {
   137  			fmt.Fprintf(os.Stderr, "%v\n", err)
   138  			os.Exit(1)
   139  		}
   140  	}
   141  	glog.Infof("Starting Heapster API server...")
   142  	go runApiServer(server)
   143  }
   144  
   145  func startSecureServing(opt *options.HeapsterRunOptions, handler http.Handler, promHandler http.Handler,
   146  	mux *http.ServeMux, address string) {
   147  
   148  	if len(opt.TLSClientCAFile) > 0 {
   149  		authPprofHandler, err := newAuthHandler(opt, handler)
   150  		if err != nil {
   151  			glog.Fatalf("Failed to create authorized pprof handler: %v", err)
   152  		}
   153  		handler = authPprofHandler
   154  
   155  		authPromHandler, err := newAuthHandler(opt, promHandler)
   156  		if err != nil {
   157  			glog.Fatalf("Failed to create authorized prometheus handler: %v", err)
   158  		}
   159  		promHandler = authPromHandler
   160  	}
   161  	mux.Handle("/", handler)
   162  	mux.Handle("/metrics", promHandler)
   163  
   164  	// If allowed users is set, then we need to enable Client Authentication
   165  	if len(opt.AllowedUsers) > 0 {
   166  		server := &http.Server{
   167  			Addr:      address,
   168  			Handler:   mux,
   169  			TLSConfig: &tls.Config{ClientAuth: tls.RequestClientCert},
   170  		}
   171  		glog.Fatal(server.ListenAndServeTLS(opt.TLSCertFile, opt.TLSKeyFile))
   172  	} else {
   173  		glog.Fatal(http.ListenAndServeTLS(address, opt.TLSCertFile, opt.TLSKeyFile, mux))
   174  	}
   175  }
   176  
   177  func createSourceManagerOrDie(src flags.Uris) core.MetricsSource {
   178  	if len(src) != 1 {
   179  		glog.Fatal("Wrong number of sources specified")
   180  	}
   181  	sourceFactory := sources.NewSourceFactory()
   182  	sourceProvider, err := sourceFactory.BuildAll(src)
   183  	if err != nil {
   184  		glog.Fatalf("Failed to create source provide: %v", err)
   185  	}
   186  	sourceManager, err := sources.NewSourceManager(sourceProvider, sources.DefaultMetricsScrapeTimeout)
   187  	if err != nil {
   188  		glog.Fatalf("Failed to create source manager: %v", err)
   189  	}
   190  	return sourceManager
   191  }
   192  
   193  func createAndInitSinksOrDie(sinkAddresses flags.Uris, historicalSource string, sinkExportDataTimeout time.Duration, disableMetricSink bool) (core.DataSink, *metricsink.MetricSink, core.HistoricalSource) {
   194  	sinksFactory := sinks.NewSinkFactory()
   195  	metricSink, sinkList, histSource := sinksFactory.BuildAll(sinkAddresses, historicalSource, disableMetricSink)
   196  	if metricSink == nil && !disableMetricSink {
   197  		glog.Fatal("Failed to create metric sink")
   198  	}
   199  	if histSource == nil && len(historicalSource) > 0 {
   200  		glog.Fatal("Failed to use a sink as a historical metrics source")
   201  	}
   202  	for _, sink := range sinkList {
   203  		glog.Infof("Starting with %s", sink.Name())
   204  	}
   205  	sinkManager, err := sinks.NewDataSinkManager(sinkList, sinkExportDataTimeout, sinks.DefaultSinkStopTimeout)
   206  	if err != nil {
   207  		glog.Fatalf("Failed to create sink manager: %v", err)
   208  	}
   209  	return sinkManager, metricSink, histSource
   210  }
   211  
   212  func getListersOrDie(kubernetesUrl *url.URL) (v1listers.PodLister, v1listers.NodeLister) {
   213  	kubeClient := createKubeClientOrDie(kubernetesUrl)
   214  
   215  	podLister, err := getPodLister(kubeClient)
   216  	if err != nil {
   217  		glog.Fatalf("Failed to create podLister: %v", err)
   218  	}
   219  	nodeLister, _, err := util.GetNodeLister(kubeClient)
   220  	if err != nil {
   221  		glog.Fatalf("Failed to create nodeLister: %v", err)
   222  	}
   223  	return podLister, nodeLister
   224  }
   225  
   226  func createKubeClientOrDie(kubernetesUrl *url.URL) *kube_client.Clientset {
   227  	kubeConfig, err := kube_config.GetKubeClientConfig(kubernetesUrl)
   228  	if err != nil {
   229  		glog.Fatalf("Failed to get client config: %v", err)
   230  	}
   231  	return kube_client.NewForConfigOrDie(kubeConfig)
   232  }
   233  
   234  func createDataProcessorsOrDie(kubernetesUrl *url.URL, podLister v1listers.PodLister, labelCopier *util.LabelCopier) []core.DataProcessor {
   235  	dataProcessors := []core.DataProcessor{
   236  		// Convert cumulative to rate
   237  		processors.NewRateCalculator(core.RateMetricsMapping),
   238  	}
   239  
   240  	podBasedEnricher, err := processors.NewPodBasedEnricher(podLister, labelCopier)
   241  	if err != nil {
   242  		glog.Fatalf("Failed to create PodBasedEnricher: %v", err)
   243  	}
   244  	dataProcessors = append(dataProcessors, podBasedEnricher)
   245  
   246  	namespaceBasedEnricher, err := processors.NewNamespaceBasedEnricher(kubernetesUrl)
   247  	if err != nil {
   248  		glog.Fatalf("Failed to create NamespaceBasedEnricher: %v", err)
   249  	}
   250  	dataProcessors = append(dataProcessors, namespaceBasedEnricher)
   251  
   252  	// aggregators
   253  	metricsToAggregate := []string{
   254  		core.MetricCpuUsageRate.Name,
   255  		core.MetricMemoryUsage.Name,
   256  		core.MetricCpuRequest.Name,
   257  		core.MetricCpuLimit.Name,
   258  		core.MetricMemoryRequest.Name,
   259  		core.MetricMemoryLimit.Name,
   260  	}
   261  
   262  	metricsToAggregateForNode := []string{
   263  		core.MetricCpuRequest.Name,
   264  		core.MetricCpuLimit.Name,
   265  		core.MetricMemoryRequest.Name,
   266  		core.MetricMemoryLimit.Name,
   267  		core.MetricEphemeralStorageRequest.Name,
   268  		core.MetricEphemeralStorageLimit.Name,
   269  	}
   270  
   271  	dataProcessors = append(dataProcessors,
   272  		processors.NewPodAggregator(),
   273  		&processors.NamespaceAggregator{
   274  			MetricsToAggregate: metricsToAggregate,
   275  		},
   276  		&processors.NodeAggregator{
   277  			MetricsToAggregate: metricsToAggregateForNode,
   278  		},
   279  		&processors.ClusterAggregator{
   280  			MetricsToAggregate: metricsToAggregate,
   281  		})
   282  
   283  	nodeAutoscalingEnricher, err := processors.NewNodeAutoscalingEnricher(kubernetesUrl, labelCopier)
   284  	if err != nil {
   285  		glog.Fatalf("Failed to create NodeAutoscalingEnricher: %v", err)
   286  	}
   287  	dataProcessors = append(dataProcessors, nodeAutoscalingEnricher)
   288  	return dataProcessors
   289  }
   290  
   291  const (
   292  	minMetricsCount = 1
   293  	maxMetricsDelay = 3 * time.Minute
   294  )
   295  
   296  func healthzChecker(metricSink *metricsink.MetricSink) healthz.HealthzChecker {
   297  	return healthz.NamedCheck("healthz", func(r *http.Request) error {
   298  		batch := metricSink.GetLatestDataBatch()
   299  		if batch == nil {
   300  			return errors.New("could not get the latest data batch")
   301  		}
   302  		if time.Since(batch.Timestamp) > maxMetricsDelay {
   303  			message := fmt.Sprintf("No current data batch available (latest: %s).", batch.Timestamp.String())
   304  			glog.Warningf(message)
   305  			return errors.New(message)
   306  		}
   307  		if len(batch.MetricSets) < minMetricsCount {
   308  			message := fmt.Sprintf("Not enough metrics found in the latest data batch: %d (expected min. %d) %s", len(batch.MetricSets), minMetricsCount, batch.Timestamp.String())
   309  			glog.Warningf(message)
   310  			return errors.New(message)
   311  		}
   312  		return nil
   313  	})
   314  }
   315  
   316  // Gets the address of the kubernetes source from the list of source URIs.
   317  // Possible kubernetes sources are: 'kubernetes' and 'kubernetes.summary_api'
   318  func getKubernetesAddress(args flags.Uris) (*url.URL, error) {
   319  	for _, uri := range args {
   320  		if strings.SplitN(uri.Key, ".", 2)[0] == "kubernetes" {
   321  			return &uri.Val, nil
   322  		}
   323  	}
   324  	return nil, fmt.Errorf("No kubernetes source found.")
   325  }
   326  
   327  func getPodLister(kubeClient *kube_client.Clientset) (v1listers.PodLister, error) {
   328  	lw := cache.NewListWatchFromClient(kubeClient.CoreV1().RESTClient(), "pods", kube_api.NamespaceAll, fields.Everything())
   329  	store := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
   330  	podLister := v1listers.NewPodLister(store)
   331  	reflector := cache.NewReflector(lw, &kube_api.Pod{}, store, time.Hour)
   332  	go reflector.Run(wait.NeverStop)
   333  	return podLister, nil
   334  }
   335  
   336  func validateFlags(opt *options.HeapsterRunOptions) error {
   337  	if opt.MetricResolution < 5*time.Second {
   338  		return fmt.Errorf("metric resolution should not be less than 5 seconds - %d", opt.MetricResolution)
   339  	}
   340  	if (len(opt.TLSCertFile) > 0 && len(opt.TLSKeyFile) == 0) || (len(opt.TLSCertFile) == 0 && len(opt.TLSKeyFile) > 0) {
   341  		return fmt.Errorf("both TLS certificate & key are required to enable TLS serving")
   342  	}
   343  	if len(opt.TLSClientCAFile) > 0 && len(opt.TLSCertFile) == 0 {
   344  		return fmt.Errorf("client cert authentication requires TLS certificate & key")
   345  	}
   346  	return nil
   347  }
   348  
   349  func setMaxProcs(opt *options.HeapsterRunOptions) {
   350  	// Allow as many threads as we have cores unless the user specified a value.
   351  	var numProcs int
   352  	if opt.MaxProcs < 1 {
   353  		numProcs = runtime.NumCPU()
   354  	} else {
   355  		numProcs = opt.MaxProcs
   356  	}
   357  	runtime.GOMAXPROCS(numProcs)
   358  
   359  	// Check if the setting was successful.
   360  	actualNumProcs := runtime.GOMAXPROCS(0)
   361  	if actualNumProcs != numProcs {
   362  		glog.Warningf("Specified max procs of %d but using %d", numProcs, actualNumProcs)
   363  	}
   364  }