github.com/prysmaticlabs/prysm@v1.4.4/shared/clientstats/scrapers.go (about) 1 package clientstats 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "io" 8 "net/http" 9 "strconv" 10 "time" 11 12 dto "github.com/prometheus/client_model/go" 13 "github.com/prometheus/prom2json" 14 eth "github.com/prysmaticlabs/prysm/proto/eth/v1alpha1" 15 log "github.com/sirupsen/logrus" 16 ) 17 18 type beaconNodeScraper struct { 19 url string 20 tripper http.RoundTripper 21 } 22 23 func (bc *beaconNodeScraper) Scrape() (io.Reader, error) { 24 log.Infof("Scraping beacon-node at %s", bc.url) 25 pf, err := scrapeProm(bc.url, bc.tripper) 26 if err != nil { 27 return nil, err 28 } 29 30 bs := populateBeaconNodeStats(pf) 31 32 b, err := json.Marshal(bs) 33 return bytes.NewBuffer(b), err 34 } 35 36 // NewBeaconNodeScraper constructs a Scaper capable of scraping 37 // the prometheus endpoint of a beacon-node process and producing 38 // the json body for the beaconnode client-stats process type. 39 func NewBeaconNodeScraper(promExpoURL string) Scraper { 40 return &beaconNodeScraper{ 41 url: promExpoURL, 42 } 43 } 44 45 type validatorScraper struct { 46 url string 47 tripper http.RoundTripper 48 } 49 50 func (vc *validatorScraper) Scrape() (io.Reader, error) { 51 log.Infof("Scraping validator at %s", vc.url) 52 pf, err := scrapeProm(vc.url, vc.tripper) 53 if err != nil { 54 return nil, err 55 } 56 57 vs := populateValidatorStats(pf) 58 59 b, err := json.Marshal(vs) 60 return bytes.NewBuffer(b), err 61 } 62 63 // NewValidatorScraper constructs a Scaper capable of scraping 64 // the prometheus endpoint of a validator process and producing 65 // the json body for the validator client-stats process type. 66 func NewValidatorScraper(promExpoURL string) Scraper { 67 return &validatorScraper{ 68 url: promExpoURL, 69 } 70 } 71 72 // note on tripper -- under the hood FetchMetricFamilies constructs an http.Client, 73 // which, if transport is nil, will just use the DefaultTransport, so we 74 // really only bother specifying the transport in tests, otherwise we let 75 // the zero-value (which is nil) flow through so that the default transport 76 // will be used. 77 func scrapeProm(url string, tripper http.RoundTripper) (map[string]*dto.MetricFamily, error) { 78 mfChan := make(chan *dto.MetricFamily) 79 errChan := make(chan error) 80 go func() { 81 // FetchMetricFamilies handles grpc flavored prometheus ez 82 // but at the cost of the awkward channel select loop below 83 err := prom2json.FetchMetricFamilies(url, mfChan, tripper) 84 if err != nil { 85 errChan <- err 86 } 87 }() 88 result := make(map[string]*dto.MetricFamily) 89 // channel select accumulates results from FetchMetricFamilies 90 // unless there is an error. 91 for { 92 select { 93 case fam, chanOpen := <-mfChan: 94 // FetchMetricFamiles will close the channel when done 95 // at which point we want to stop the goroutine 96 if fam == nil && !chanOpen { 97 return result, nil 98 } 99 ptr := fam 100 result[fam.GetName()] = ptr 101 case err := <-errChan: 102 return result, err 103 } 104 if errChan == nil && mfChan == nil { 105 return result, nil 106 } 107 } 108 } 109 110 type metricMap map[string]*dto.MetricFamily 111 112 func (mm metricMap) getFamily(name string) (*dto.MetricFamily, error) { 113 f, ok := mm[name] 114 if !ok { 115 return nil, fmt.Errorf("scraper did not find metric family %s", name) 116 } 117 return f, nil 118 } 119 120 var now = time.Now // var hook for tests to overwrite 121 var nanosPerMilli = int64(time.Millisecond) / int64(time.Nanosecond) 122 123 func populateAPIMessage(processName string) APIMessage { 124 return APIMessage{ 125 Timestamp: now().UnixNano() / nanosPerMilli, 126 APIVersion: APIVersion, 127 ProcessName: processName, 128 } 129 } 130 131 func populateCommonStats(pf metricMap) CommonStats { 132 cs := CommonStats{} 133 cs.ClientName = ClientName 134 var f *dto.MetricFamily 135 var m *dto.Metric 136 var err error 137 138 f, err = pf.getFamily("process_cpu_seconds_total") 139 if err != nil { 140 log.WithError(err).Debug("Failed to get process_cpu_seconds_total") 141 } else { 142 m = f.Metric[0] 143 // float64->int64: truncates fractional seconds 144 cs.CPUProcessSecondsTotal = int64(m.Counter.GetValue()) 145 } 146 147 f, err = pf.getFamily("process_resident_memory_bytes") 148 if err != nil { 149 log.WithError(err).Debug("Failed to get process_resident_memory_bytes") 150 } else { 151 m = f.Metric[0] 152 cs.MemoryProcessBytes = int64(m.Gauge.GetValue()) 153 } 154 155 f, err = pf.getFamily("prysm_version") 156 if err != nil { 157 log.WithError(err).Debug("Failed to get prysm_version") 158 } else { 159 m = f.Metric[0] 160 for _, l := range m.GetLabel() { 161 switch l.GetName() { 162 case "version": 163 cs.ClientVersion = l.GetValue() 164 case "buildDate": 165 buildDate, err := strconv.Atoi(l.GetValue()) 166 if err != nil { 167 log.WithError(err).Debug("Failed to retrieve buildDate label from the prysm_version metric") 168 continue 169 } 170 cs.ClientBuild = int64(buildDate) 171 } 172 } 173 } 174 175 return cs 176 } 177 178 func populateBeaconNodeStats(pf metricMap) BeaconNodeStats { 179 var err error 180 bs := BeaconNodeStats{} 181 bs.CommonStats = populateCommonStats(pf) 182 bs.APIMessage = populateAPIMessage(BeaconNodeProcessName) 183 184 var f *dto.MetricFamily 185 var m *dto.Metric 186 187 f, err = pf.getFamily("beacon_head_slot") 188 if err != nil { 189 log.WithError(err).Debug("Failed to get beacon_head_slot") 190 } else { 191 m = f.Metric[0] 192 bs.SyncBeaconHeadSlot = int64(m.Gauge.GetValue()) 193 } 194 195 f, err = pf.getFamily("beacon_clock_time_slot") 196 if err != nil { 197 log.WithError(err).Debug("Failed to get beacon_clock_time_slot") 198 } else { 199 m = f.Metric[0] 200 if int64(m.Gauge.GetValue()) == bs.SyncBeaconHeadSlot { 201 bs.SyncEth2Synced = true 202 } 203 } 204 205 f, err = pf.getFamily("bcnode_disk_beaconchain_bytes_total") 206 if err != nil { 207 log.WithError(err).Debug("Failed to get bcnode_disk_beaconchain_bytes_total") 208 } else { 209 m = f.Metric[0] 210 bs.DiskBeaconchainBytesTotal = int64(m.Gauge.GetValue()) 211 } 212 213 f, err = pf.getFamily("p2p_peer_count") 214 if err != nil { 215 log.WithError(err).Debug("Failed to get p2p_peer_count") 216 } else { 217 for _, m := range f.Metric { 218 for _, l := range m.GetLabel() { 219 if l.GetName() == "state" { 220 if l.GetValue() == "Connected" { 221 bs.NetworkPeersConnected = int64(m.Gauge.GetValue()) 222 } 223 } 224 } 225 } 226 } 227 228 f, err = pf.getFamily("powchain_sync_eth1_connected") 229 if err != nil { 230 log.WithError(err).Debug("Failed to get powchain_sync_eth1_connected") 231 } else { 232 m = f.Metric[0] 233 bs.SyncEth1Connected = false 234 if int64(m.Gauge.GetValue()) == 1 { 235 bs.SyncEth1Connected = true 236 } 237 } 238 239 f, err = pf.getFamily("powchain_sync_eth1_fallback_configured") 240 if err != nil { 241 log.WithError(err).Debug("Failed to get powchain_sync_eth1_fallback_configured") 242 } else { 243 m = f.Metric[0] 244 bs.SyncEth1FallbackConfigured = false 245 if int64(m.Gauge.GetValue()) == 1 { 246 bs.SyncEth1FallbackConfigured = true 247 } 248 } 249 250 f, err = pf.getFamily("powchain_sync_eth1_fallback_connected") 251 if err != nil { 252 log.WithError(err).Debug("Failed to get powchain_sync_eth1_fallback_connected") 253 } else { 254 m = f.Metric[0] 255 bs.SyncEth1FallbackConnected = false 256 if int64(m.Gauge.GetValue()) == 1 { 257 bs.SyncEth1FallbackConnected = true 258 } 259 } 260 261 return bs 262 } 263 264 func statusIsActive(statusCode int64) bool { 265 s := eth.ValidatorStatus(statusCode) 266 return s.String() == "ACTIVE" 267 } 268 269 func populateValidatorStats(pf metricMap) ValidatorStats { 270 var err error 271 vs := ValidatorStats{} 272 vs.CommonStats = populateCommonStats(pf) 273 vs.APIMessage = populateAPIMessage(ValidatorProcessName) 274 275 f, err := pf.getFamily("validator_statuses") 276 if err != nil { 277 log.WithError(err).Debug("Failed to get validator_statuses") 278 } else { 279 for _, m := range f.Metric { 280 if statusIsActive(int64(m.Gauge.GetValue())) { 281 vs.ValidatorActive += 1 282 } 283 vs.ValidatorTotal += 1 284 } 285 } 286 287 return vs 288 }