github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/raft/metrics.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package raft
     5  
     6  import (
     7  	"github.com/armon/go-metrics"
     8  	pmetrics "github.com/armon/go-metrics/prometheus"
     9  	"github.com/juju/errors"
    10  	"github.com/prometheus/client_golang/prometheus"
    11  )
    12  
    13  // newMetricsCollector returns a collector for the metrics gathered in the
    14  // hashicorp/raft library code.
    15  func newMetricsCollector() (prometheus.Collector, error) {
    16  	sink, err := pmetrics.NewPrometheusSink()
    17  	if err != nil {
    18  		return nil, errors.Trace(err)
    19  	}
    20  	// go-metrics always registers the sink it returns in the default
    21  	// registry, which we don't collect metrics from - unregister it
    22  	// so subsequent calls don't fail because it's already registered
    23  	// there.
    24  	prometheus.DefaultRegisterer.Unregister(sink)
    25  	_, err = metrics.NewGlobal(metrics.DefaultConfig("juju"), sink)
    26  	if err != nil {
    27  		return nil, errors.Trace(err)
    28  	}
    29  	return sink, nil
    30  }
    31  
    32  func registerMetrics(registry prometheus.Registerer, logger Logger) {
    33  	collector, err := newMetricsCollector()
    34  	if err != nil {
    35  		// It isn't a fatal error to fail to set up metrics, so
    36  		// log and continue
    37  		logger.Warningf("creating a raft metrics collector failed: %v", err)
    38  		return
    39  	}
    40  
    41  	// We use unregister/register rather than
    42  	// register/defer-unregister to avoid this scenario:
    43  	// * raft.newWorker is called, which starts loop in a
    44  	//   goroutine.
    45  	// * loop registers the collector and defers unregistering.
    46  	// * loop gets delayed starting raft (possibly it's taking a
    47  	//   long time for the peergrouper to publish the api addresses).
    48  	// * newWorker times out waiting for loop to be ready, kills the
    49  	//   catacomb and returns a timeout error - at this point loop
    50  	//   hasn't finished, so the collector hasn't been unregistered.
    51  	// * The dep-engine calls newWorker again, it starts a new loop
    52  	//   goroutine.
    53  	// * The new run of loop can't register the collector, and we
    54  	//   never see raft metrics for this controller.
    55  	registry.Unregister(collector)
    56  	err = registry.Register(collector)
    57  	if err != nil {
    58  		logger.Warningf("registering metrics collector failed: %v", err)
    59  	}
    60  
    61  }