github.com/prysmaticlabs/prysm@v1.4.4/shared/prometheus/service.go (about)

     1  // Package prometheus defines a service which is used for metrics collection
     2  // and health of a node in Prysm.
     3  package prometheus
     4  
     5  import (
     6  	"bytes"
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"net/http"
    11  	"runtime/debug"
    12  	"runtime/pprof"
    13  	"time"
    14  
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/prometheus/client_golang/prometheus/promhttp"
    17  	"github.com/prysmaticlabs/prysm/shared"
    18  	"github.com/sirupsen/logrus"
    19  )
    20  
    21  var log = logrus.WithField("prefix", "prometheus")
    22  
    23  // Service provides Prometheus metrics via the /metrics route. This route will
    24  // show all the metrics registered with the Prometheus DefaultRegisterer.
    25  type Service struct {
    26  	server      *http.Server
    27  	svcRegistry *shared.ServiceRegistry
    28  	failStatus  error
    29  }
    30  
    31  // Handler represents a path and handler func to serve on the same port as /metrics, /healthz, /goroutinez, etc.
    32  type Handler struct {
    33  	Path    string
    34  	Handler func(http.ResponseWriter, *http.Request)
    35  }
    36  
    37  // NewService sets up a new instance for a given address host:port.
    38  // An empty host will match with any IP so an address like ":2121" is perfectly acceptable.
    39  func NewService(addr string, svcRegistry *shared.ServiceRegistry, additionalHandlers ...Handler) *Service {
    40  	s := &Service{svcRegistry: svcRegistry}
    41  
    42  	mux := http.NewServeMux()
    43  	mux.Handle("/metrics", promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{
    44  		MaxRequestsInFlight: 5,
    45  		Timeout:             30 * time.Second,
    46  	}))
    47  	mux.HandleFunc("/healthz", s.healthzHandler)
    48  	mux.HandleFunc("/goroutinez", s.goroutinezHandler)
    49  
    50  	// Register additional handlers.
    51  	for _, h := range additionalHandlers {
    52  		mux.HandleFunc(h.Path, h.Handler)
    53  	}
    54  
    55  	s.server = &http.Server{Addr: addr, Handler: mux}
    56  
    57  	return s
    58  }
    59  
    60  func (s *Service) healthzHandler(w http.ResponseWriter, r *http.Request) {
    61  	response := generatedResponse{}
    62  
    63  	type serviceStatus struct {
    64  		Name   string `json:"service"`
    65  		Status bool   `json:"status"`
    66  		Err    string `json:"error"`
    67  	}
    68  	var hasError bool
    69  	var statuses []serviceStatus
    70  	for k, v := range s.svcRegistry.Statuses() {
    71  		s := serviceStatus{
    72  			Name:   k.String(),
    73  			Status: true,
    74  		}
    75  		if v != nil {
    76  			s.Status = false
    77  			s.Err = v.Error()
    78  			if s.Err != "" {
    79  				hasError = true
    80  			}
    81  		}
    82  		statuses = append(statuses, s)
    83  	}
    84  	response.Data = statuses
    85  
    86  	if hasError {
    87  		w.WriteHeader(http.StatusServiceUnavailable)
    88  	} else {
    89  		w.WriteHeader(http.StatusOK)
    90  	}
    91  
    92  	// Handle plain text content.
    93  	if contentType := negotiateContentType(r); contentType == contentTypePlainText {
    94  		var buf bytes.Buffer
    95  		for _, s := range statuses {
    96  			var status string
    97  			if s.Status {
    98  				status = "OK"
    99  			} else {
   100  				status = "ERROR, " + s.Err
   101  			}
   102  
   103  			if _, err := buf.WriteString(fmt.Sprintf("%s: %s\n", s.Name, status)); err != nil {
   104  				response.Err = err.Error()
   105  				break
   106  			}
   107  		}
   108  		response.Data = buf
   109  	}
   110  
   111  	if err := writeResponse(w, r, response); err != nil {
   112  		log.Errorf("Error writing response: %v", err)
   113  	}
   114  }
   115  
   116  func (s *Service) goroutinezHandler(w http.ResponseWriter, _ *http.Request) {
   117  	stack := debug.Stack()
   118  	if _, err := w.Write(stack); err != nil {
   119  		log.WithError(err).Error("Failed to write goroutines stack")
   120  	}
   121  	if err := pprof.Lookup("goroutine").WriteTo(w, 2); err != nil {
   122  		log.WithError(err).Error("Failed to write pprof goroutines")
   123  	}
   124  }
   125  
   126  // Start the prometheus service.
   127  func (s *Service) Start() {
   128  	go func() {
   129  		// See if the port is already used.
   130  		conn, err := net.DialTimeout("tcp", s.server.Addr, time.Second)
   131  		if err == nil {
   132  			if err := conn.Close(); err != nil {
   133  				log.WithError(err).Error("Failed to close connection")
   134  			}
   135  			// Something on the port; we cannot use it.
   136  			log.WithField("address", s.server.Addr).Warn("Port already in use; cannot start prometheus service")
   137  		} else {
   138  			// Nothing on that port; we can use it.
   139  			log.WithField("address", s.server.Addr).Debug("Starting prometheus service")
   140  			err := s.server.ListenAndServe()
   141  			if err != nil && err != http.ErrServerClosed {
   142  				log.Errorf("Could not listen to host:port :%s: %v", s.server.Addr, err)
   143  				s.failStatus = err
   144  			}
   145  		}
   146  	}()
   147  }
   148  
   149  // Stop the service gracefully.
   150  func (s *Service) Stop() error {
   151  	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
   152  	defer cancel()
   153  	return s.server.Shutdown(ctx)
   154  }
   155  
   156  // Status checks for any service failure conditions.
   157  func (s *Service) Status() error {
   158  	if s.failStatus != nil {
   159  		return s.failStatus
   160  	}
   161  	return nil
   162  }