github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/metrics.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"sync"
     5  
     6  	"github.com/docker/docker/errdefs"
     7  	"github.com/docker/docker/pkg/plugingetter"
     8  	"github.com/docker/docker/pkg/plugins"
     9  	metrics "github.com/docker/go-metrics"
    10  	"github.com/pkg/errors"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  const metricsPluginType = "MetricsCollector"
    16  
    17  var (
    18  	containerActions          metrics.LabeledTimer
    19  	networkActions            metrics.LabeledTimer
    20  	hostInfoFunctions         metrics.LabeledTimer
    21  	engineInfo                metrics.LabeledGauge
    22  	engineCpus                metrics.Gauge
    23  	engineMemory              metrics.Gauge
    24  	healthChecksCounter       metrics.Counter
    25  	healthChecksFailedCounter metrics.Counter
    26  
    27  	stateCtr *stateCounter
    28  )
    29  
    30  func init() {
    31  	ns := metrics.NewNamespace("engine", "daemon", nil)
    32  	containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
    33  	for _, a := range []string{
    34  		"start",
    35  		"changes",
    36  		"commit",
    37  		"create",
    38  		"delete",
    39  	} {
    40  		containerActions.WithValues(a).Update(0)
    41  	}
    42  	hostInfoFunctions = ns.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")
    43  
    44  	networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
    45  	engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
    46  		"version",
    47  		"commit",
    48  		"architecture",
    49  		"graphdriver",
    50  		"kernel",
    51  		"os",
    52  		"os_type",
    53  		"os_version",
    54  		"daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
    55  	)
    56  	engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
    57  	engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
    58  	healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
    59  	healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
    60  
    61  	stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
    62  	ns.Add(stateCtr)
    63  
    64  	metrics.Register(ns)
    65  }
    66  
    67  type stateCounter struct {
    68  	mu     sync.Mutex
    69  	states map[string]string
    70  	desc   *prometheus.Desc
    71  }
    72  
    73  func newStateCounter(desc *prometheus.Desc) *stateCounter {
    74  	return &stateCounter{
    75  		states: make(map[string]string),
    76  		desc:   desc,
    77  	}
    78  }
    79  
    80  func (ctr *stateCounter) get() (running int, paused int, stopped int) {
    81  	ctr.mu.Lock()
    82  	defer ctr.mu.Unlock()
    83  
    84  	states := map[string]int{
    85  		"running": 0,
    86  		"paused":  0,
    87  		"stopped": 0,
    88  	}
    89  	for _, state := range ctr.states {
    90  		states[state]++
    91  	}
    92  	return states["running"], states["paused"], states["stopped"]
    93  }
    94  
    95  func (ctr *stateCounter) set(id, label string) {
    96  	ctr.mu.Lock()
    97  	ctr.states[id] = label
    98  	ctr.mu.Unlock()
    99  }
   100  
   101  func (ctr *stateCounter) del(id string) {
   102  	ctr.mu.Lock()
   103  	delete(ctr.states, id)
   104  	ctr.mu.Unlock()
   105  }
   106  
   107  func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
   108  	ch <- ctr.desc
   109  }
   110  
   111  func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
   112  	running, paused, stopped := ctr.get()
   113  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
   114  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
   115  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
   116  }
   117  
   118  func (daemon *Daemon) cleanupMetricsPlugins() {
   119  	ls := daemon.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
   120  	var wg sync.WaitGroup
   121  	wg.Add(len(ls))
   122  
   123  	for _, plugin := range ls {
   124  		p := plugin
   125  		go func() {
   126  			defer wg.Done()
   127  
   128  			adapter, err := makePluginAdapter(p)
   129  			if err != nil {
   130  				logrus.WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
   131  				return
   132  			}
   133  			if err := adapter.StopMetrics(); err != nil {
   134  				logrus.WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
   135  			}
   136  		}()
   137  	}
   138  	wg.Wait()
   139  
   140  	if daemon.metricsPluginListener != nil {
   141  		daemon.metricsPluginListener.Close()
   142  	}
   143  }
   144  
   145  type metricsPlugin interface {
   146  	StartMetrics() error
   147  	StopMetrics() error
   148  }
   149  
   150  func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
   151  	if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
   152  		return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
   153  	}
   154  
   155  	pa, ok := p.(plugingetter.PluginAddr)
   156  	if !ok {
   157  		return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
   158  	}
   159  
   160  	if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
   161  		return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
   162  	}
   163  
   164  	addr := pa.Addr()
   165  	client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
   166  	if err != nil {
   167  		return nil, errors.Wrap(err, "error creating metrics plugin client")
   168  	}
   169  	return &metricsPluginAdapter{client, p.Name()}, nil
   170  }
   171  
   172  type metricsPluginAdapter struct {
   173  	c    *plugins.Client
   174  	name string
   175  }
   176  
   177  func (a *metricsPluginAdapter) StartMetrics() error {
   178  	type metricsPluginResponse struct {
   179  		Err string
   180  	}
   181  	var res metricsPluginResponse
   182  	if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
   183  		return errors.Wrap(err, "could not start metrics plugin")
   184  	}
   185  	if res.Err != "" {
   186  		return errors.New(res.Err)
   187  	}
   188  	return nil
   189  }
   190  
   191  func (a *metricsPluginAdapter) StopMetrics() error {
   192  	if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
   193  		return errors.Wrap(err, "error stopping metrics collector")
   194  	}
   195  	return nil
   196  }