github.com/prysmaticlabs/prysm@v1.4.4/shared/clientstats/scrapers.go (about)

     1  package clientstats
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"net/http"
     9  	"strconv"
    10  	"time"
    11  
    12  	dto "github.com/prometheus/client_model/go"
    13  	"github.com/prometheus/prom2json"
    14  	eth "github.com/prysmaticlabs/prysm/proto/eth/v1alpha1"
    15  	log "github.com/sirupsen/logrus"
    16  )
    17  
    18  type beaconNodeScraper struct {
    19  	url     string
    20  	tripper http.RoundTripper
    21  }
    22  
    23  func (bc *beaconNodeScraper) Scrape() (io.Reader, error) {
    24  	log.Infof("Scraping beacon-node at %s", bc.url)
    25  	pf, err := scrapeProm(bc.url, bc.tripper)
    26  	if err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	bs := populateBeaconNodeStats(pf)
    31  
    32  	b, err := json.Marshal(bs)
    33  	return bytes.NewBuffer(b), err
    34  }
    35  
    36  // NewBeaconNodeScraper constructs a Scaper capable of scraping
    37  // the prometheus endpoint of a beacon-node process and producing
    38  // the json body for the beaconnode client-stats process type.
    39  func NewBeaconNodeScraper(promExpoURL string) Scraper {
    40  	return &beaconNodeScraper{
    41  		url: promExpoURL,
    42  	}
    43  }
    44  
    45  type validatorScraper struct {
    46  	url     string
    47  	tripper http.RoundTripper
    48  }
    49  
    50  func (vc *validatorScraper) Scrape() (io.Reader, error) {
    51  	log.Infof("Scraping validator at %s", vc.url)
    52  	pf, err := scrapeProm(vc.url, vc.tripper)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	vs := populateValidatorStats(pf)
    58  
    59  	b, err := json.Marshal(vs)
    60  	return bytes.NewBuffer(b), err
    61  }
    62  
    63  // NewValidatorScraper constructs a Scaper capable of scraping
    64  // the prometheus endpoint of a validator process and producing
    65  // the json body for the validator client-stats process type.
    66  func NewValidatorScraper(promExpoURL string) Scraper {
    67  	return &validatorScraper{
    68  		url: promExpoURL,
    69  	}
    70  }
    71  
    72  // note on tripper -- under the hood FetchMetricFamilies constructs an http.Client,
    73  // which, if transport is nil, will just use the DefaultTransport, so we
    74  // really only bother specifying the transport in tests, otherwise we let
    75  // the zero-value (which is nil) flow through so that the default transport
    76  // will be used.
    77  func scrapeProm(url string, tripper http.RoundTripper) (map[string]*dto.MetricFamily, error) {
    78  	mfChan := make(chan *dto.MetricFamily)
    79  	errChan := make(chan error)
    80  	go func() {
    81  		// FetchMetricFamilies handles grpc flavored prometheus ez
    82  		// but at the cost of the awkward channel select loop below
    83  		err := prom2json.FetchMetricFamilies(url, mfChan, tripper)
    84  		if err != nil {
    85  			errChan <- err
    86  		}
    87  	}()
    88  	result := make(map[string]*dto.MetricFamily)
    89  	// channel select accumulates results from FetchMetricFamilies
    90  	// unless there is an error.
    91  	for {
    92  		select {
    93  		case fam, chanOpen := <-mfChan:
    94  			// FetchMetricFamiles will close the channel when done
    95  			// at which point we want to stop the goroutine
    96  			if fam == nil && !chanOpen {
    97  				return result, nil
    98  			}
    99  			ptr := fam
   100  			result[fam.GetName()] = ptr
   101  		case err := <-errChan:
   102  			return result, err
   103  		}
   104  		if errChan == nil && mfChan == nil {
   105  			return result, nil
   106  		}
   107  	}
   108  }
   109  
   110  type metricMap map[string]*dto.MetricFamily
   111  
   112  func (mm metricMap) getFamily(name string) (*dto.MetricFamily, error) {
   113  	f, ok := mm[name]
   114  	if !ok {
   115  		return nil, fmt.Errorf("scraper did not find metric family %s", name)
   116  	}
   117  	return f, nil
   118  }
   119  
   120  var now = time.Now // var hook for tests to overwrite
   121  var nanosPerMilli = int64(time.Millisecond) / int64(time.Nanosecond)
   122  
   123  func populateAPIMessage(processName string) APIMessage {
   124  	return APIMessage{
   125  		Timestamp:   now().UnixNano() / nanosPerMilli,
   126  		APIVersion:  APIVersion,
   127  		ProcessName: processName,
   128  	}
   129  }
   130  
   131  func populateCommonStats(pf metricMap) CommonStats {
   132  	cs := CommonStats{}
   133  	cs.ClientName = ClientName
   134  	var f *dto.MetricFamily
   135  	var m *dto.Metric
   136  	var err error
   137  
   138  	f, err = pf.getFamily("process_cpu_seconds_total")
   139  	if err != nil {
   140  		log.WithError(err).Debug("Failed to get process_cpu_seconds_total")
   141  	} else {
   142  		m = f.Metric[0]
   143  		// float64->int64: truncates fractional seconds
   144  		cs.CPUProcessSecondsTotal = int64(m.Counter.GetValue())
   145  	}
   146  
   147  	f, err = pf.getFamily("process_resident_memory_bytes")
   148  	if err != nil {
   149  		log.WithError(err).Debug("Failed to get process_resident_memory_bytes")
   150  	} else {
   151  		m = f.Metric[0]
   152  		cs.MemoryProcessBytes = int64(m.Gauge.GetValue())
   153  	}
   154  
   155  	f, err = pf.getFamily("prysm_version")
   156  	if err != nil {
   157  		log.WithError(err).Debug("Failed to get prysm_version")
   158  	} else {
   159  		m = f.Metric[0]
   160  		for _, l := range m.GetLabel() {
   161  			switch l.GetName() {
   162  			case "version":
   163  				cs.ClientVersion = l.GetValue()
   164  			case "buildDate":
   165  				buildDate, err := strconv.Atoi(l.GetValue())
   166  				if err != nil {
   167  					log.WithError(err).Debug("Failed to retrieve buildDate label from the prysm_version metric")
   168  					continue
   169  				}
   170  				cs.ClientBuild = int64(buildDate)
   171  			}
   172  		}
   173  	}
   174  
   175  	return cs
   176  }
   177  
   178  func populateBeaconNodeStats(pf metricMap) BeaconNodeStats {
   179  	var err error
   180  	bs := BeaconNodeStats{}
   181  	bs.CommonStats = populateCommonStats(pf)
   182  	bs.APIMessage = populateAPIMessage(BeaconNodeProcessName)
   183  
   184  	var f *dto.MetricFamily
   185  	var m *dto.Metric
   186  
   187  	f, err = pf.getFamily("beacon_head_slot")
   188  	if err != nil {
   189  		log.WithError(err).Debug("Failed to get beacon_head_slot")
   190  	} else {
   191  		m = f.Metric[0]
   192  		bs.SyncBeaconHeadSlot = int64(m.Gauge.GetValue())
   193  	}
   194  
   195  	f, err = pf.getFamily("beacon_clock_time_slot")
   196  	if err != nil {
   197  		log.WithError(err).Debug("Failed to get beacon_clock_time_slot")
   198  	} else {
   199  		m = f.Metric[0]
   200  		if int64(m.Gauge.GetValue()) == bs.SyncBeaconHeadSlot {
   201  			bs.SyncEth2Synced = true
   202  		}
   203  	}
   204  
   205  	f, err = pf.getFamily("bcnode_disk_beaconchain_bytes_total")
   206  	if err != nil {
   207  		log.WithError(err).Debug("Failed to get bcnode_disk_beaconchain_bytes_total")
   208  	} else {
   209  		m = f.Metric[0]
   210  		bs.DiskBeaconchainBytesTotal = int64(m.Gauge.GetValue())
   211  	}
   212  
   213  	f, err = pf.getFamily("p2p_peer_count")
   214  	if err != nil {
   215  		log.WithError(err).Debug("Failed to get p2p_peer_count")
   216  	} else {
   217  		for _, m := range f.Metric {
   218  			for _, l := range m.GetLabel() {
   219  				if l.GetName() == "state" {
   220  					if l.GetValue() == "Connected" {
   221  						bs.NetworkPeersConnected = int64(m.Gauge.GetValue())
   222  					}
   223  				}
   224  			}
   225  		}
   226  	}
   227  
   228  	f, err = pf.getFamily("powchain_sync_eth1_connected")
   229  	if err != nil {
   230  		log.WithError(err).Debug("Failed to get powchain_sync_eth1_connected")
   231  	} else {
   232  		m = f.Metric[0]
   233  		bs.SyncEth1Connected = false
   234  		if int64(m.Gauge.GetValue()) == 1 {
   235  			bs.SyncEth1Connected = true
   236  		}
   237  	}
   238  
   239  	f, err = pf.getFamily("powchain_sync_eth1_fallback_configured")
   240  	if err != nil {
   241  		log.WithError(err).Debug("Failed to get powchain_sync_eth1_fallback_configured")
   242  	} else {
   243  		m = f.Metric[0]
   244  		bs.SyncEth1FallbackConfigured = false
   245  		if int64(m.Gauge.GetValue()) == 1 {
   246  			bs.SyncEth1FallbackConfigured = true
   247  		}
   248  	}
   249  
   250  	f, err = pf.getFamily("powchain_sync_eth1_fallback_connected")
   251  	if err != nil {
   252  		log.WithError(err).Debug("Failed to get powchain_sync_eth1_fallback_connected")
   253  	} else {
   254  		m = f.Metric[0]
   255  		bs.SyncEth1FallbackConnected = false
   256  		if int64(m.Gauge.GetValue()) == 1 {
   257  			bs.SyncEth1FallbackConnected = true
   258  		}
   259  	}
   260  
   261  	return bs
   262  }
   263  
   264  func statusIsActive(statusCode int64) bool {
   265  	s := eth.ValidatorStatus(statusCode)
   266  	return s.String() == "ACTIVE"
   267  }
   268  
   269  func populateValidatorStats(pf metricMap) ValidatorStats {
   270  	var err error
   271  	vs := ValidatorStats{}
   272  	vs.CommonStats = populateCommonStats(pf)
   273  	vs.APIMessage = populateAPIMessage(ValidatorProcessName)
   274  
   275  	f, err := pf.getFamily("validator_statuses")
   276  	if err != nil {
   277  		log.WithError(err).Debug("Failed to get validator_statuses")
   278  	} else {
   279  		for _, m := range f.Metric {
   280  			if statusIsActive(int64(m.Gauge.GetValue())) {
   281  				vs.ValidatorActive += 1
   282  			}
   283  			vs.ValidatorTotal += 1
   284  		}
   285  	}
   286  
   287  	return vs
   288  }