gopkg.in/docker/docker.v23@v23.0.11/daemon/metrics.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"sync"
     5  
     6  	"github.com/docker/docker/errdefs"
     7  	"github.com/docker/docker/pkg/plugingetter"
     8  	"github.com/docker/docker/pkg/plugins"
     9  	metrics "github.com/docker/go-metrics"
    10  	"github.com/pkg/errors"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  const metricsPluginType = "MetricsCollector"
    16  
    17  var (
    18  	metricsNS = metrics.NewNamespace("engine", "daemon", nil)
    19  
    20  	containerActions  = metricsNS.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
    21  	networkActions    = metricsNS.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
    22  	hostInfoFunctions = metricsNS.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")
    23  
    24  	engineInfo = metricsNS.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
    25  		"version",
    26  		"commit",
    27  		"architecture",
    28  		"graphdriver",
    29  		"kernel",
    30  		"os",
    31  		"os_type",
    32  		"os_version",
    33  		"daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
    34  	)
    35  	engineCpus   = metricsNS.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
    36  	engineMemory = metricsNS.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
    37  
    38  	healthChecksCounter       = metricsNS.NewCounter("health_checks", "The total number of health checks")
    39  	healthChecksFailedCounter = metricsNS.NewCounter("health_checks_failed", "The total number of failed health checks")
    40  	healthCheckStartDuration  = metricsNS.NewTimer("health_check_start_duration", "The number of seconds it takes to prepare to run health checks")
    41  
    42  	stateCtr = newStateCounter(metricsNS, metricsNS.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
    43  )
    44  
    45  func init() {
    46  	for _, a := range []string{
    47  		"start",
    48  		"changes",
    49  		"commit",
    50  		"create",
    51  		"delete",
    52  	} {
    53  		containerActions.WithValues(a).Update(0)
    54  	}
    55  
    56  	metrics.Register(metricsNS)
    57  }
    58  
    59  type stateCounter struct {
    60  	mu     sync.RWMutex
    61  	states map[string]string
    62  	desc   *prometheus.Desc
    63  }
    64  
    65  func newStateCounter(ns *metrics.Namespace, desc *prometheus.Desc) *stateCounter {
    66  	c := &stateCounter{
    67  		states: make(map[string]string),
    68  		desc:   desc,
    69  	}
    70  	ns.Add(c)
    71  	return c
    72  }
    73  
    74  func (ctr *stateCounter) get() (running int, paused int, stopped int) {
    75  	ctr.mu.RLock()
    76  	defer ctr.mu.RUnlock()
    77  
    78  	states := map[string]int{
    79  		"running": 0,
    80  		"paused":  0,
    81  		"stopped": 0,
    82  	}
    83  	for _, state := range ctr.states {
    84  		states[state]++
    85  	}
    86  	return states["running"], states["paused"], states["stopped"]
    87  }
    88  
    89  func (ctr *stateCounter) set(id, label string) {
    90  	ctr.mu.Lock()
    91  	ctr.states[id] = label
    92  	ctr.mu.Unlock()
    93  }
    94  
    95  func (ctr *stateCounter) del(id string) {
    96  	ctr.mu.Lock()
    97  	delete(ctr.states, id)
    98  	ctr.mu.Unlock()
    99  }
   100  
   101  func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
   102  	ch <- ctr.desc
   103  }
   104  
   105  func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
   106  	running, paused, stopped := ctr.get()
   107  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
   108  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
   109  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
   110  }
   111  
   112  func (daemon *Daemon) cleanupMetricsPlugins() {
   113  	ls := daemon.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
   114  	var wg sync.WaitGroup
   115  	wg.Add(len(ls))
   116  
   117  	for _, plugin := range ls {
   118  		p := plugin
   119  		go func() {
   120  			defer wg.Done()
   121  
   122  			adapter, err := makePluginAdapter(p)
   123  			if err != nil {
   124  				logrus.WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
   125  				return
   126  			}
   127  			if err := adapter.StopMetrics(); err != nil {
   128  				logrus.WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
   129  			}
   130  		}()
   131  	}
   132  	wg.Wait()
   133  
   134  	if daemon.metricsPluginListener != nil {
   135  		daemon.metricsPluginListener.Close()
   136  	}
   137  }
   138  
   139  type metricsPlugin interface {
   140  	StartMetrics() error
   141  	StopMetrics() error
   142  }
   143  
   144  func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
   145  	if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
   146  		return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
   147  	}
   148  
   149  	pa, ok := p.(plugingetter.PluginAddr)
   150  	if !ok {
   151  		return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
   152  	}
   153  
   154  	if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
   155  		return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
   156  	}
   157  
   158  	addr := pa.Addr()
   159  	client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
   160  	if err != nil {
   161  		return nil, errors.Wrap(err, "error creating metrics plugin client")
   162  	}
   163  	return &metricsPluginAdapter{client, p.Name()}, nil
   164  }
   165  
   166  type metricsPluginAdapter struct {
   167  	c    *plugins.Client
   168  	name string
   169  }
   170  
   171  func (a *metricsPluginAdapter) StartMetrics() error {
   172  	type metricsPluginResponse struct {
   173  		Err string
   174  	}
   175  	var res metricsPluginResponse
   176  	if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
   177  		return errors.Wrap(err, "could not start metrics plugin")
   178  	}
   179  	if res.Err != "" {
   180  		return errors.New(res.Err)
   181  	}
   182  	return nil
   183  }
   184  
   185  func (a *metricsPluginAdapter) StopMetrics() error {
   186  	if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
   187  		return errors.Wrap(err, "error stopping metrics collector")
   188  	}
   189  	return nil
   190  }