github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/sources/manager.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sources
    16  
    17  import (
    18  	"math/rand"
    19  	"time"
    20  
    21  	. "k8s.io/heapster/metrics/core"
    22  
    23  	"github.com/golang/glog"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  )
    26  
    27  const (
    28  	DefaultMetricsScrapeTimeout = 20 * time.Second
    29  	MaxDelayMs                  = 4 * 1000
    30  	DelayPerSourceMs            = 8
    31  )
    32  
    33  var (
    34  	// Last time Heapster performed a scrape since unix epoch in seconds.
    35  	lastScrapeTimestamp = prometheus.NewGaugeVec(
    36  		prometheus.GaugeOpts{
    37  			Namespace: "heapster",
    38  			Subsystem: "scraper",
    39  			Name:      "last_time_seconds",
    40  			Help:      "Last time Heapster performed a scrape since unix epoch in seconds.",
    41  		},
    42  		[]string{"source"},
    43  	)
    44  
    45  	// Time spent exporting scraping sources in milliseconds.
    46  	scraperDuration = prometheus.NewSummaryVec(
    47  		prometheus.SummaryOpts{
    48  			Namespace: "heapster",
    49  			Subsystem: "scraper",
    50  			Name:      "duration_milliseconds",
    51  			Help:      "Time spent scraping sources in milliseconds.",
    52  		},
    53  		[]string{"source"},
    54  	)
    55  )
    56  
    57  func init() {
    58  	prometheus.MustRegister(lastScrapeTimestamp)
    59  	prometheus.MustRegister(scraperDuration)
    60  }
    61  
    62  func NewSourceManager(metricsSourceProvider MetricsSourceProvider, metricsScrapeTimeout time.Duration) (MetricsSource, error) {
    63  	return &sourceManager{
    64  		metricsSourceProvider: metricsSourceProvider,
    65  		metricsScrapeTimeout:  metricsScrapeTimeout,
    66  	}, nil
    67  }
    68  
    69  type sourceManager struct {
    70  	metricsSourceProvider MetricsSourceProvider
    71  	metricsScrapeTimeout  time.Duration
    72  }
    73  
    74  func (this *sourceManager) Name() string {
    75  	return "source_manager"
    76  }
    77  
    78  func (this *sourceManager) ScrapeMetrics(start, end time.Time) (*DataBatch, error) {
    79  	glog.V(1).Infof("Scraping metrics start: %s, end: %s", start, end)
    80  	sources := this.metricsSourceProvider.GetMetricsSources()
    81  
    82  	responseChannel := make(chan *DataBatch)
    83  	startTime := time.Now()
    84  	timeoutTime := startTime.Add(this.metricsScrapeTimeout)
    85  
    86  	delayMs := DelayPerSourceMs * len(sources)
    87  	if delayMs > MaxDelayMs {
    88  		delayMs = MaxDelayMs
    89  	}
    90  
    91  	for _, source := range sources {
    92  
    93  		go func(source MetricsSource, channel chan *DataBatch, start, end, timeoutTime time.Time, delayInMs int) {
    94  
    95  			// Prevents network congestion.
    96  			time.Sleep(time.Duration(rand.Intn(delayMs)) * time.Millisecond)
    97  
    98  			glog.V(2).Infof("Querying source: %s", source)
    99  			metrics, err := scrape(source, start, end)
   100  			if err != nil {
   101  				glog.Errorf("Error in scraping containers from %s: %v", source.Name(), err)
   102  				return
   103  			}
   104  
   105  			now := time.Now()
   106  			if !now.Before(timeoutTime) {
   107  				glog.Warningf("Failed to get %s response in time", source)
   108  				return
   109  			}
   110  			timeForResponse := timeoutTime.Sub(now)
   111  
   112  			select {
   113  			case channel <- metrics:
   114  				// passed the response correctly.
   115  				return
   116  			case <-time.After(timeForResponse):
   117  				glog.Warningf("Failed to send the response back %s", source)
   118  				return
   119  			}
   120  		}(source, responseChannel, start, end, timeoutTime, delayMs)
   121  	}
   122  	response := DataBatch{
   123  		Timestamp:  end,
   124  		MetricSets: map[string]*MetricSet{},
   125  	}
   126  
   127  	latencies := make([]int, 11)
   128  
   129  responseloop:
   130  	for i := range sources {
   131  		now := time.Now()
   132  		if !now.Before(timeoutTime) {
   133  			glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources))
   134  			break
   135  		}
   136  
   137  		select {
   138  		case dataBatch := <-responseChannel:
   139  			if dataBatch != nil {
   140  				for key, value := range dataBatch.MetricSets {
   141  					response.MetricSets[key] = value
   142  				}
   143  			}
   144  			latency := now.Sub(startTime)
   145  			bucket := int(latency.Seconds())
   146  			if bucket >= len(latencies) {
   147  				bucket = len(latencies) - 1
   148  			}
   149  			latencies[bucket]++
   150  
   151  		case <-time.After(timeoutTime.Sub(now)):
   152  			glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources))
   153  			break responseloop
   154  		}
   155  	}
   156  
   157  	glog.V(1).Infof("ScrapeMetrics: time: %s size: %d", time.Since(startTime), len(response.MetricSets))
   158  	for i, value := range latencies {
   159  		glog.V(1).Infof("   scrape  bucket %d: %d", i, value)
   160  	}
   161  	return &response, nil
   162  }
   163  
   164  func scrape(s MetricsSource, start, end time.Time) (*DataBatch, error) {
   165  	sourceName := s.Name()
   166  	startTime := time.Now()
   167  	defer func() {
   168  		lastScrapeTimestamp.
   169  			WithLabelValues(sourceName).
   170  			Set(float64(time.Now().Unix()))
   171  		scraperDuration.
   172  			WithLabelValues(sourceName).
   173  			Observe(float64(time.Since(startTime)) / float64(time.Millisecond))
   174  	}()
   175  
   176  	return s.ScrapeMetrics(start, end)
   177  }