github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/stats/collector.go (about)

     1  package stats // import "github.com/docker/docker/daemon/stats"
     2  
     3  import (
     4  	"bufio"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/docker/docker/api/types"
     9  	"github.com/docker/docker/container"
    10  	"github.com/docker/docker/pkg/pubsub"
    11  	"github.com/sirupsen/logrus"
    12  )
    13  
    14  // Collector manages and provides container resource stats
    15  type Collector struct {
    16  	m          sync.Mutex
    17  	cond       *sync.Cond
    18  	supervisor supervisor
    19  	interval   time.Duration
    20  	publishers map[*container.Container]*pubsub.Publisher
    21  	bufReader  *bufio.Reader
    22  
    23  	// The following fields are not set on Windows currently.
    24  	clockTicksPerSecond uint64
    25  }
    26  
    27  // NewCollector creates a stats collector that will poll the supervisor with the specified interval
    28  func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
    29  	s := &Collector{
    30  		interval:   interval,
    31  		supervisor: supervisor,
    32  		publishers: make(map[*container.Container]*pubsub.Publisher),
    33  		bufReader:  bufio.NewReaderSize(nil, 128),
    34  	}
    35  	s.cond = sync.NewCond(&s.m)
    36  
    37  	platformNewStatsCollector(s)
    38  
    39  	return s
    40  }
    41  
    42  type supervisor interface {
    43  	// GetContainerStats collects all the stats related to a container
    44  	GetContainerStats(container *container.Container) (*types.StatsJSON, error)
    45  }
    46  
    47  // Collect registers the container with the collector and adds it to
    48  // the event loop for collection on the specified interval returning
    49  // a channel for the subscriber to receive on.
    50  func (s *Collector) Collect(c *container.Container) chan interface{} {
    51  	s.cond.L.Lock()
    52  	defer s.cond.L.Unlock()
    53  
    54  	publisher, exists := s.publishers[c]
    55  	if !exists {
    56  		publisher = pubsub.NewPublisher(100*time.Millisecond, 1024)
    57  		s.publishers[c] = publisher
    58  	}
    59  
    60  	s.cond.Broadcast()
    61  	return publisher.Subscribe()
    62  }
    63  
    64  // StopCollection closes the channels for all subscribers and removes
    65  // the container from metrics collection.
    66  func (s *Collector) StopCollection(c *container.Container) {
    67  	s.m.Lock()
    68  	if publisher, exists := s.publishers[c]; exists {
    69  		publisher.Close()
    70  		delete(s.publishers, c)
    71  	}
    72  	s.m.Unlock()
    73  }
    74  
    75  // Unsubscribe removes a specific subscriber from receiving updates for a container's stats.
    76  func (s *Collector) Unsubscribe(c *container.Container, ch chan interface{}) {
    77  	s.m.Lock()
    78  	publisher := s.publishers[c]
    79  	if publisher != nil {
    80  		publisher.Evict(ch)
    81  		if publisher.Len() == 0 {
    82  			delete(s.publishers, c)
    83  		}
    84  	}
    85  	s.m.Unlock()
    86  }
    87  
    88  // Run starts the collectors and will indefinitely collect stats from the supervisor
    89  func (s *Collector) Run() {
    90  	type publishersPair struct {
    91  		container *container.Container
    92  		publisher *pubsub.Publisher
    93  	}
    94  	// we cannot determine the capacity here.
    95  	// it will grow enough in first iteration
    96  	var pairs []publishersPair
    97  
    98  	for {
    99  		s.cond.L.Lock()
   100  		for len(s.publishers) == 0 {
   101  			s.cond.Wait()
   102  		}
   103  
   104  		// it does not make sense in the first iteration,
   105  		// but saves allocations in further iterations
   106  		pairs = pairs[:0]
   107  
   108  		for container, publisher := range s.publishers {
   109  			// copy pointers here to release the lock ASAP
   110  			pairs = append(pairs, publishersPair{container, publisher})
   111  		}
   112  
   113  		s.cond.L.Unlock()
   114  
   115  		onlineCPUs, err := s.getNumberOnlineCPUs()
   116  		if err != nil {
   117  			logrus.Errorf("collecting system online cpu count: %v", err)
   118  			continue
   119  		}
   120  
   121  		for _, pair := range pairs {
   122  			stats, err := s.supervisor.GetContainerStats(pair.container)
   123  
   124  			switch err.(type) {
   125  			case nil:
   126  				// Sample system CPU usage close to container usage to avoid
   127  				// noise in metric calculations.
   128  				systemUsage, err := s.getSystemCPUUsage()
   129  				if err != nil {
   130  					logrus.WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
   131  					continue
   132  				}
   133  
   134  				// FIXME: move to containerd on Linux (not Windows)
   135  				stats.CPUStats.SystemUsage = systemUsage
   136  				stats.CPUStats.OnlineCPUs = onlineCPUs
   137  
   138  				pair.publisher.Publish(*stats)
   139  
   140  			case notRunningErr, notFoundErr:
   141  				// publish empty stats containing only name and ID if not running or not found
   142  				pair.publisher.Publish(types.StatsJSON{
   143  					Name: pair.container.Name,
   144  					ID:   pair.container.ID,
   145  				})
   146  
   147  			default:
   148  				logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
   149  				pair.publisher.Publish(types.StatsJSON{
   150  					Name: pair.container.Name,
   151  					ID:   pair.container.ID,
   152  				})
   153  			}
   154  		}
   155  
   156  		time.Sleep(s.interval)
   157  	}
   158  }
   159  
   160  type notRunningErr interface {
   161  	error
   162  	Conflict()
   163  }
   164  
   165  type notFoundErr interface {
   166  	error
   167  	NotFound()
   168  }