github.com/aclisp/heapster@v0.19.2-0.20160613100040-51756f899a96/metrics/sources/manager.go (about) 1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sources 16 17 import ( 18 "math/rand" 19 "time" 20 21 . "k8s.io/heapster/metrics/core" 22 23 "github.com/golang/glog" 24 "github.com/prometheus/client_golang/prometheus" 25 ) 26 27 const ( 28 DefaultMetricsScrapeTimeout = 20 * time.Second 29 MaxDelayMs = 4 * 1000 30 DelayPerSourceMs = 8 31 ) 32 33 var ( 34 // Last time Heapster performed a scrape since unix epoch in seconds. 35 lastScrapeTimestamp = prometheus.NewGaugeVec( 36 prometheus.GaugeOpts{ 37 Namespace: "heapster", 38 Subsystem: "scraper", 39 Name: "last_time_seconds", 40 Help: "Last time Heapster performed a scrape since unix epoch in seconds.", 41 }, 42 []string{"source"}, 43 ) 44 45 // Time spent exporting scraping sources in microseconds.. 46 scraperDuration = prometheus.NewSummaryVec( 47 prometheus.SummaryOpts{ 48 Namespace: "heapster", 49 Subsystem: "scraper", 50 Name: "duration_microseconds", 51 Help: "Time spent scraping sources in microseconds.", 52 }, 53 []string{"source"}, 54 ) 55 ) 56 57 func init() { 58 prometheus.MustRegister(lastScrapeTimestamp) 59 prometheus.MustRegister(scraperDuration) 60 } 61 62 func NewSourceManager(metricsSourceProvider MetricsSourceProvider, metricsScrapeTimeout time.Duration) (MetricsSource, error) { 63 return &sourceManager{ 64 metricsSourceProvider: metricsSourceProvider, 65 metricsScrapeTimeout: metricsScrapeTimeout, 66 }, nil 67 } 68 69 type sourceManager struct { 70 metricsSourceProvider MetricsSourceProvider 71 metricsScrapeTimeout time.Duration 72 } 73 74 func (this *sourceManager) Name() string { 75 return "source_manager" 76 } 77 78 func (this *sourceManager) ScrapeMetrics(start, end time.Time) *DataBatch { 79 glog.Infof("Scraping metrics start: %s, end: %s", start, end) 80 sources := this.metricsSourceProvider.GetMetricsSources() 81 82 responseChannel := make(chan *DataBatch) 83 startTime := time.Now() 84 timeoutTime := startTime.Add(this.metricsScrapeTimeout) 85 86 delayMs := DelayPerSourceMs * len(sources) 87 if delayMs > MaxDelayMs { 88 delayMs = MaxDelayMs 89 } 90 91 for _, source := range sources { 92 93 go func(source MetricsSource, channel chan *DataBatch, start, end, timeoutTime time.Time, delayInMs int) { 94 95 // Prevents network congestion. 96 time.Sleep(time.Duration(rand.Intn(delayMs)) * time.Millisecond) 97 98 glog.V(2).Infof("Querying source: %s", source) 99 metrics := scrape(source, start, end) 100 now := time.Now() 101 if !now.Before(timeoutTime) { 102 glog.Warningf("Failed to get %s response in time", source) 103 return 104 } 105 timeForResponse := timeoutTime.Sub(now) 106 107 select { 108 case channel <- metrics: 109 // passed the response correctly. 110 return 111 case <-time.After(timeForResponse): 112 glog.Warningf("Failed to send the response back %s", source) 113 return 114 } 115 }(source, responseChannel, start, end, timeoutTime, delayMs) 116 } 117 response := DataBatch{ 118 Timestamp: end, 119 MetricSets: map[string]*MetricSet{}, 120 } 121 122 latencies := make([]int, 11) 123 124 responseloop: 125 for i := range sources { 126 now := time.Now() 127 if !now.Before(timeoutTime) { 128 glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources)) 129 break 130 } 131 132 select { 133 case dataBatch := <-responseChannel: 134 if dataBatch != nil { 135 for key, value := range dataBatch.MetricSets { 136 response.MetricSets[key] = value 137 } 138 } 139 latency := now.Sub(startTime) 140 bucket := int(latency.Seconds()) 141 if bucket >= len(latencies) { 142 bucket = len(latencies) - 1 143 } 144 latencies[bucket]++ 145 146 case <-time.After(timeoutTime.Sub(now)): 147 glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources)) 148 break responseloop 149 } 150 } 151 152 glog.Infof("ScrapeMetrics: time: %s size: %d", time.Since(startTime), len(response.MetricSets)) 153 for i, value := range latencies { 154 glog.V(1).Infof(" scrape bucket %d: %d", i, value) 155 } 156 return &response 157 } 158 159 func scrape(s MetricsSource, start, end time.Time) *DataBatch { 160 sourceName := s.Name() 161 startTime := time.Now() 162 defer lastScrapeTimestamp. 163 WithLabelValues(sourceName). 164 Set(float64(time.Now().Unix())) 165 defer scraperDuration. 166 WithLabelValues(sourceName). 167 Observe(float64(time.Since(startTime)) / float64(time.Microsecond)) 168 169 return s.ScrapeMetrics(start, end) 170 }