github.com/moby/docker@v26.1.3+incompatible/daemon/metrics.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	"github.com/containerd/log"
     8  	"github.com/docker/docker/errdefs"
     9  	"github.com/docker/docker/pkg/plugingetter"
    10  	"github.com/docker/docker/pkg/plugins"
    11  	metrics "github.com/docker/go-metrics"
    12  	"github.com/pkg/errors"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  )
    15  
    16  const metricsPluginType = "MetricsCollector"
    17  
    18  var (
    19  	metricsNS = metrics.NewNamespace("engine", "daemon", nil)
    20  
    21  	containerActions  = metricsNS.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
    22  	networkActions    = metricsNS.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
    23  	hostInfoFunctions = metricsNS.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")
    24  
    25  	engineInfo = metricsNS.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
    26  		"version",
    27  		"commit",
    28  		"architecture",
    29  		"graphdriver",
    30  		"kernel",
    31  		"os",
    32  		"os_type",
    33  		"os_version",
    34  		"daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
    35  	)
    36  	engineCpus   = metricsNS.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
    37  	engineMemory = metricsNS.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
    38  
    39  	healthChecksCounter       = metricsNS.NewCounter("health_checks", "The total number of health checks")
    40  	healthChecksFailedCounter = metricsNS.NewCounter("health_checks_failed", "The total number of failed health checks")
    41  	healthCheckStartDuration  = metricsNS.NewTimer("health_check_start_duration", "The number of seconds it takes to prepare to run health checks")
    42  
    43  	stateCtr = newStateCounter(metricsNS, metricsNS.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
    44  )
    45  
    46  func init() {
    47  	for _, a := range []string{
    48  		"start",
    49  		"changes",
    50  		"commit",
    51  		"create",
    52  		"delete",
    53  	} {
    54  		containerActions.WithValues(a).Update(0)
    55  	}
    56  
    57  	metrics.Register(metricsNS)
    58  }
    59  
    60  type stateCounter struct {
    61  	mu     sync.RWMutex
    62  	states map[string]string
    63  	desc   *prometheus.Desc
    64  }
    65  
    66  func newStateCounter(ns *metrics.Namespace, desc *prometheus.Desc) *stateCounter {
    67  	c := &stateCounter{
    68  		states: make(map[string]string),
    69  		desc:   desc,
    70  	}
    71  	ns.Add(c)
    72  	return c
    73  }
    74  
    75  func (ctr *stateCounter) get() (running int, paused int, stopped int) {
    76  	ctr.mu.RLock()
    77  	defer ctr.mu.RUnlock()
    78  
    79  	states := map[string]int{
    80  		"running": 0,
    81  		"paused":  0,
    82  		"stopped": 0,
    83  	}
    84  	for _, state := range ctr.states {
    85  		states[state]++
    86  	}
    87  	return states["running"], states["paused"], states["stopped"]
    88  }
    89  
    90  func (ctr *stateCounter) set(id, label string) {
    91  	ctr.mu.Lock()
    92  	ctr.states[id] = label
    93  	ctr.mu.Unlock()
    94  }
    95  
    96  func (ctr *stateCounter) del(id string) {
    97  	ctr.mu.Lock()
    98  	delete(ctr.states, id)
    99  	ctr.mu.Unlock()
   100  }
   101  
   102  func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
   103  	ch <- ctr.desc
   104  }
   105  
   106  func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
   107  	running, paused, stopped := ctr.get()
   108  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
   109  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
   110  	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
   111  }
   112  
   113  func (daemon *Daemon) cleanupMetricsPlugins() {
   114  	ls := daemon.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
   115  	var wg sync.WaitGroup
   116  	wg.Add(len(ls))
   117  
   118  	for _, plugin := range ls {
   119  		p := plugin
   120  		go func() {
   121  			defer wg.Done()
   122  
   123  			adapter, err := makePluginAdapter(p)
   124  			if err != nil {
   125  				log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
   126  				return
   127  			}
   128  			if err := adapter.StopMetrics(); err != nil {
   129  				log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
   130  			}
   131  		}()
   132  	}
   133  	wg.Wait()
   134  
   135  	if daemon.metricsPluginListener != nil {
   136  		daemon.metricsPluginListener.Close()
   137  	}
   138  }
   139  
   140  type metricsPlugin interface {
   141  	StartMetrics() error
   142  	StopMetrics() error
   143  }
   144  
   145  func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
   146  	if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
   147  		return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
   148  	}
   149  
   150  	pa, ok := p.(plugingetter.PluginAddr)
   151  	if !ok {
   152  		return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
   153  	}
   154  
   155  	if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
   156  		return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
   157  	}
   158  
   159  	addr := pa.Addr()
   160  	client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
   161  	if err != nil {
   162  		return nil, errors.Wrap(err, "error creating metrics plugin client")
   163  	}
   164  	return &metricsPluginAdapter{client, p.Name()}, nil
   165  }
   166  
   167  type metricsPluginAdapter struct {
   168  	c    *plugins.Client
   169  	name string
   170  }
   171  
   172  func (a *metricsPluginAdapter) StartMetrics() error {
   173  	type metricsPluginResponse struct {
   174  		Err string
   175  	}
   176  	var res metricsPluginResponse
   177  	if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
   178  		return errors.Wrap(err, "could not start metrics plugin")
   179  	}
   180  	if res.Err != "" {
   181  		return errors.New(res.Err)
   182  	}
   183  	return nil
   184  }
   185  
   186  func (a *metricsPluginAdapter) StopMetrics() error {
   187  	if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
   188  		return errors.Wrap(err, "error stopping metrics collector")
   189  	}
   190  	return nil
   191  }