github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/daemon/stats/collector.go (about)

     1  package stats // import "github.com/docker/docker/daemon/stats"
     2  
     3  import (
     4  	"bufio"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/docker/docker/api/types"
     9  	"github.com/docker/docker/container"
    10  	"github.com/docker/docker/errdefs"
    11  	"github.com/moby/pubsub"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  // Collector manages and provides container resource stats
    16  type Collector struct {
    17  	m          sync.Mutex
    18  	cond       *sync.Cond
    19  	supervisor supervisor
    20  	interval   time.Duration
    21  	publishers map[*container.Container]*pubsub.Publisher
    22  	bufReader  *bufio.Reader
    23  }
    24  
    25  // NewCollector creates a stats collector that will poll the supervisor with the specified interval
    26  func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
    27  	s := &Collector{
    28  		interval:   interval,
    29  		supervisor: supervisor,
    30  		publishers: make(map[*container.Container]*pubsub.Publisher),
    31  		bufReader:  bufio.NewReaderSize(nil, 128),
    32  	}
    33  	s.cond = sync.NewCond(&s.m)
    34  	return s
    35  }
    36  
    37  type supervisor interface {
    38  	// GetContainerStats collects all the stats related to a container
    39  	GetContainerStats(container *container.Container) (*types.StatsJSON, error)
    40  }
    41  
    42  // Collect registers the container with the collector and adds it to
    43  // the event loop for collection on the specified interval returning
    44  // a channel for the subscriber to receive on.
    45  func (s *Collector) Collect(c *container.Container) chan interface{} {
    46  	s.cond.L.Lock()
    47  	defer s.cond.L.Unlock()
    48  
    49  	publisher, exists := s.publishers[c]
    50  	if !exists {
    51  		publisher = pubsub.NewPublisher(100*time.Millisecond, 1024)
    52  		s.publishers[c] = publisher
    53  	}
    54  
    55  	s.cond.Broadcast()
    56  	return publisher.Subscribe()
    57  }
    58  
    59  // StopCollection closes the channels for all subscribers and removes
    60  // the container from metrics collection.
    61  func (s *Collector) StopCollection(c *container.Container) {
    62  	s.m.Lock()
    63  	if publisher, exists := s.publishers[c]; exists {
    64  		publisher.Close()
    65  		delete(s.publishers, c)
    66  	}
    67  	s.m.Unlock()
    68  }
    69  
    70  // Unsubscribe removes a specific subscriber from receiving updates for a container's stats.
    71  func (s *Collector) Unsubscribe(c *container.Container, ch chan interface{}) {
    72  	s.m.Lock()
    73  	publisher := s.publishers[c]
    74  	if publisher != nil {
    75  		publisher.Evict(ch)
    76  		if publisher.Len() == 0 {
    77  			delete(s.publishers, c)
    78  		}
    79  	}
    80  	s.m.Unlock()
    81  }
    82  
    83  // Run starts the collectors and will indefinitely collect stats from the supervisor
    84  func (s *Collector) Run() {
    85  	type publishersPair struct {
    86  		container *container.Container
    87  		publisher *pubsub.Publisher
    88  	}
    89  	// we cannot determine the capacity here.
    90  	// it will grow enough in first iteration
    91  	var pairs []publishersPair
    92  
    93  	for {
    94  		s.cond.L.Lock()
    95  		for len(s.publishers) == 0 {
    96  			s.cond.Wait()
    97  		}
    98  
    99  		// it does not make sense in the first iteration,
   100  		// but saves allocations in further iterations
   101  		pairs = pairs[:0]
   102  
   103  		for container, publisher := range s.publishers {
   104  			// copy pointers here to release the lock ASAP
   105  			pairs = append(pairs, publishersPair{container, publisher})
   106  		}
   107  
   108  		s.cond.L.Unlock()
   109  
   110  		onlineCPUs, err := s.getNumberOnlineCPUs()
   111  		if err != nil {
   112  			logrus.Errorf("collecting system online cpu count: %v", err)
   113  			continue
   114  		}
   115  
   116  		for _, pair := range pairs {
   117  			stats, err := s.supervisor.GetContainerStats(pair.container)
   118  
   119  			switch err.(type) {
   120  			case nil:
   121  				// Sample system CPU usage close to container usage to avoid
   122  				// noise in metric calculations.
   123  				systemUsage, err := s.getSystemCPUUsage()
   124  				if err != nil {
   125  					logrus.WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
   126  					continue
   127  				}
   128  
   129  				// FIXME: move to containerd on Linux (not Windows)
   130  				stats.CPUStats.SystemUsage = systemUsage
   131  				stats.CPUStats.OnlineCPUs = onlineCPUs
   132  
   133  				pair.publisher.Publish(*stats)
   134  
   135  			case errdefs.ErrConflict, errdefs.ErrNotFound:
   136  				// publish empty stats containing only name and ID if not running or not found
   137  				pair.publisher.Publish(types.StatsJSON{
   138  					Name: pair.container.Name,
   139  					ID:   pair.container.ID,
   140  				})
   141  
   142  			default:
   143  				logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
   144  				pair.publisher.Publish(types.StatsJSON{
   145  					Name: pair.container.Name,
   146  					ID:   pair.container.ID,
   147  				})
   148  			}
   149  		}
   150  
   151  		time.Sleep(s.interval)
   152  	}
   153  }