github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/sources/manager.go (about) 1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sources 16 17 import ( 18 "math/rand" 19 "time" 20 21 . "k8s.io/heapster/metrics/core" 22 23 "github.com/golang/glog" 24 "github.com/prometheus/client_golang/prometheus" 25 ) 26 27 const ( 28 DefaultMetricsScrapeTimeout = 20 * time.Second 29 MaxDelayMs = 4 * 1000 30 DelayPerSourceMs = 8 31 ) 32 33 var ( 34 // Last time Heapster performed a scrape since unix epoch in seconds. 35 lastScrapeTimestamp = prometheus.NewGaugeVec( 36 prometheus.GaugeOpts{ 37 Namespace: "heapster", 38 Subsystem: "scraper", 39 Name: "last_time_seconds", 40 Help: "Last time Heapster performed a scrape since unix epoch in seconds.", 41 }, 42 []string{"source"}, 43 ) 44 45 // Time spent exporting scraping sources in milliseconds. 46 scraperDuration = prometheus.NewSummaryVec( 47 prometheus.SummaryOpts{ 48 Namespace: "heapster", 49 Subsystem: "scraper", 50 Name: "duration_milliseconds", 51 Help: "Time spent scraping sources in milliseconds.", 52 }, 53 []string{"source"}, 54 ) 55 ) 56 57 func init() { 58 prometheus.MustRegister(lastScrapeTimestamp) 59 prometheus.MustRegister(scraperDuration) 60 } 61 62 func NewSourceManager(metricsSourceProvider MetricsSourceProvider, metricsScrapeTimeout time.Duration) (MetricsSource, error) { 63 return &sourceManager{ 64 metricsSourceProvider: metricsSourceProvider, 65 metricsScrapeTimeout: metricsScrapeTimeout, 66 }, nil 67 } 68 69 type sourceManager struct { 70 metricsSourceProvider MetricsSourceProvider 71 metricsScrapeTimeout time.Duration 72 } 73 74 func (this *sourceManager) Name() string { 75 return "source_manager" 76 } 77 78 func (this *sourceManager) ScrapeMetrics(start, end time.Time) (*DataBatch, error) { 79 glog.V(1).Infof("Scraping metrics start: %s, end: %s", start, end) 80 sources := this.metricsSourceProvider.GetMetricsSources() 81 82 responseChannel := make(chan *DataBatch) 83 startTime := time.Now() 84 timeoutTime := startTime.Add(this.metricsScrapeTimeout) 85 86 delayMs := DelayPerSourceMs * len(sources) 87 if delayMs > MaxDelayMs { 88 delayMs = MaxDelayMs 89 } 90 91 for _, source := range sources { 92 93 go func(source MetricsSource, channel chan *DataBatch, start, end, timeoutTime time.Time, delayInMs int) { 94 95 // Prevents network congestion. 96 time.Sleep(time.Duration(rand.Intn(delayMs)) * time.Millisecond) 97 98 glog.V(2).Infof("Querying source: %s", source) 99 metrics, err := scrape(source, start, end) 100 if err != nil { 101 glog.Errorf("Error in scraping containers from %s: %v", source.Name(), err) 102 return 103 } 104 105 now := time.Now() 106 if !now.Before(timeoutTime) { 107 glog.Warningf("Failed to get %s response in time", source) 108 return 109 } 110 timeForResponse := timeoutTime.Sub(now) 111 112 select { 113 case channel <- metrics: 114 // passed the response correctly. 115 return 116 case <-time.After(timeForResponse): 117 glog.Warningf("Failed to send the response back %s", source) 118 return 119 } 120 }(source, responseChannel, start, end, timeoutTime, delayMs) 121 } 122 response := DataBatch{ 123 Timestamp: end, 124 MetricSets: map[string]*MetricSet{}, 125 } 126 127 latencies := make([]int, 11) 128 129 responseloop: 130 for i := range sources { 131 now := time.Now() 132 if !now.Before(timeoutTime) { 133 glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources)) 134 break 135 } 136 137 select { 138 case dataBatch := <-responseChannel: 139 if dataBatch != nil { 140 for key, value := range dataBatch.MetricSets { 141 response.MetricSets[key] = value 142 } 143 } 144 latency := now.Sub(startTime) 145 bucket := int(latency.Seconds()) 146 if bucket >= len(latencies) { 147 bucket = len(latencies) - 1 148 } 149 latencies[bucket]++ 150 151 case <-time.After(timeoutTime.Sub(now)): 152 glog.Warningf("Failed to get all responses in time (got %d/%d)", i, len(sources)) 153 break responseloop 154 } 155 } 156 157 glog.V(1).Infof("ScrapeMetrics: time: %s size: %d", time.Since(startTime), len(response.MetricSets)) 158 for i, value := range latencies { 159 glog.V(1).Infof(" scrape bucket %d: %d", i, value) 160 } 161 return &response, nil 162 } 163 164 func scrape(s MetricsSource, start, end time.Time) (*DataBatch, error) { 165 sourceName := s.Name() 166 startTime := time.Now() 167 defer func() { 168 lastScrapeTimestamp. 169 WithLabelValues(sourceName). 170 Set(float64(time.Now().Unix())) 171 scraperDuration. 172 WithLabelValues(sourceName). 173 Observe(float64(time.Since(startTime)) / float64(time.Millisecond)) 174 }() 175 176 return s.ScrapeMetrics(start, end) 177 }