github.com/aclisp/heapster@v0.19.2-0.20160613100040-51756f899a96/metrics/sources/manager.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sources
    16  
    17  import (
    18  	"math/rand"
    19  	"time"
    20  
    21  	. "k8s.io/heapster/metrics/core"
    22  
    23  	"github.com/golang/glog"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  )
    26  
    27  const (
    28  	DefaultMetricsScrapeTimeout = 20 * time.Second
    29  	MaxDelayMs                  = 4 * 1000
    30  	DelayPerSourceMs            = 8
    31  )
    32  
    33  var (
    34  	// Last time Heapster performed a scrape since unix epoch in seconds.
    35  	lastScrapeTimestamp = prometheus.NewGaugeVec(
    36  		prometheus.GaugeOpts{
    37  			Namespace: "heapster",
    38  			Subsystem: "scraper",
    39  			Name:      "last_time_seconds",
    40  			Help:      "Last time Heapster performed a scrape since unix epoch in seconds.",
    41  		},
    42  		[]string{"source"},
    43  	)
    44  
    45  	// Time spent exporting scraping sources in microseconds..
    46  	scraperDuration = prometheus.NewSummaryVec(
    47  		prometheus.SummaryOpts{
    48  			Namespace: "heapster",
    49  			Subsystem: "scraper",
    50  			Name:      "duration_microseconds",
    51  			Help:      "Time spent scraping sources in microseconds.",
    52  		},
    53  		[]string{"source"},
    54  	)
    55  )
    56  
    57  func init() {
    58  	prometheus.MustRegister(lastScrapeTimestamp)
    59  	prometheus.MustRegister(scraperDuration)
    60  }
    61  
    62  func NewSourceManager(metricsSourceProvider MetricsSourceProvider, metricsScrapeTimeout time.Duration) (MetricsSource, error) {
    63  	return &sourceManager{
    64  		metricsSourceProvider: metricsSourceProvider,
    65  		metricsScrapeTimeout:  metricsScrapeTimeout,
    66  	}, nil
    67  }
    68  
    69  type sourceManager struct {
    70  	metricsSourceProvider MetricsSourceProvider
    71  	metricsScrapeTimeout  time.Duration
    72  }
    73  
    74  func (this *sourceManager) Name() string {
    75  	return "source_manager"
    76  }
    77  
    78  func (this *sourceManager) ScrapeMetrics(start, end time.Time) *DataBatch {
    79  	glog.Infof("Scraping metrics start: %s, end: %s", start, end)
    80  	sources := this.metricsSourceProvider.GetMetricsSources()
    81  
    82  	responseChannel := make(chan *DataBatch)
    83  	startTime := time.Now()
    84  	timeoutTime := startTime.Add(this.metricsScrapeTimeout)
    85  
    86  	delayMs := DelayPerSourceMs * len(sources)
    87  	if delayMs > MaxDelayMs {
    88  		delayMs = MaxDelayMs
    89  	}
    90  
    91  	for _, source := range sources {
    92  
    93  		go func(source MetricsSource, channel chan *DataBatch, start, end, timeoutTime time.Time, delayInMs int) {
    94  
    95  			// Prevents network congestion.
    96  			time.Sleep(time.Duration(rand.Intn(delayMs)) * time.Millisecond)
    97  
    98  			glog.V(2).Infof("Querying source: %s", source)
    99  			metrics := scrape(source, start, end)
   100  			now := time.Now()
   101  			if !now.Before(timeoutTime) {
   102  				glog.Warningf("Failed to get %s response in time", source)
   103  				return
   104  			}
   105  			timeForResponse := timeoutTime.Sub(now)
   106  
   107  			select {
   108  			case channel <- metrics:
   109  				// passed the response correctly.
   110  				return
   111  			case <-time.After(timeForResponse):
   112  				glog.Warningf("Failed to send the response back %s", source)
   113  				return
   114  			}
   115  		}(source, responseChannel, start, end, timeoutTime, delayMs)
   116  	}
   117  	response := DataBatch{
   118  		Timestamp:  end,
   119  		MetricSets: map[string]*MetricSet{},
   120  	}
   121  
   122  	latencies := make([]int, 11)
   123  
   124  responseloop:
   125  	for i := range sources {
   126  		now := time.Now()
   127  		if !now.Before(timeoutTime) {
   128  			glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources))
   129  			break
   130  		}
   131  
   132  		select {
   133  		case dataBatch := <-responseChannel:
   134  			if dataBatch != nil {
   135  				for key, value := range dataBatch.MetricSets {
   136  					response.MetricSets[key] = value
   137  				}
   138  			}
   139  			latency := now.Sub(startTime)
   140  			bucket := int(latency.Seconds())
   141  			if bucket >= len(latencies) {
   142  				bucket = len(latencies) - 1
   143  			}
   144  			latencies[bucket]++
   145  
   146  		case <-time.After(timeoutTime.Sub(now)):
   147  			glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources))
   148  			break responseloop
   149  		}
   150  	}
   151  
   152  	glog.Infof("ScrapeMetrics: time: %s size: %d", time.Since(startTime), len(response.MetricSets))
   153  	for i, value := range latencies {
   154  		glog.V(1).Infof("   scrape  bucket %d: %d", i, value)
   155  	}
   156  	return &response
   157  }
   158  
   159  func scrape(s MetricsSource, start, end time.Time) *DataBatch {
   160  	sourceName := s.Name()
   161  	startTime := time.Now()
   162  	defer lastScrapeTimestamp.
   163  		WithLabelValues(sourceName).
   164  		Set(float64(time.Now().Unix()))
   165  	defer scraperDuration.
   166  		WithLabelValues(sourceName).
   167  		Observe(float64(time.Since(startTime)) / float64(time.Microsecond))
   168  
   169  	return s.ScrapeMetrics(start, end)
   170  }