github.com/demonoid81/containerd@v1.3.4/metrics/cgroups/metrics.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package cgroups
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"sync"
    25  
    26  	"github.com/containerd/cgroups"
    27  	"github.com/containerd/containerd/log"
    28  	v1 "github.com/containerd/containerd/metrics/types/v1"
    29  	"github.com/containerd/containerd/namespaces"
    30  	"github.com/containerd/containerd/runtime"
    31  	"github.com/containerd/typeurl"
    32  	metrics "github.com/docker/go-metrics"
    33  	"github.com/prometheus/client_golang/prometheus"
    34  )
    35  
    36  // Trigger will be called when an event happens and provides the cgroup
    37  // where the event originated from
    38  type Trigger func(string, string, cgroups.Cgroup)
    39  
    40  // newCollector registers the collector with the provided namespace and returns it so
    41  // that cgroups can be added for collection
    42  func newCollector(ns *metrics.Namespace) *collector {
    43  	if ns == nil {
    44  		return &collector{}
    45  	}
    46  	// add machine cpus and memory info
    47  	c := &collector{
    48  		ns:    ns,
    49  		tasks: make(map[string]runtime.Task),
    50  	}
    51  	c.metrics = append(c.metrics, pidMetrics...)
    52  	c.metrics = append(c.metrics, cpuMetrics...)
    53  	c.metrics = append(c.metrics, memoryMetrics...)
    54  	c.metrics = append(c.metrics, hugetlbMetrics...)
    55  	c.metrics = append(c.metrics, blkioMetrics...)
    56  	c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
    57  	ns.Add(c)
    58  	return c
    59  }
    60  
    61  func taskID(id, namespace string) string {
    62  	return fmt.Sprintf("%s-%s", id, namespace)
    63  }
    64  
    65  // collector provides the ability to collect container stats and export
    66  // them in the prometheus format
    67  type collector struct {
    68  	mu sync.RWMutex
    69  
    70  	tasks         map[string]runtime.Task
    71  	ns            *metrics.Namespace
    72  	metrics       []*metric
    73  	storedMetrics chan prometheus.Metric
    74  }
    75  
    76  func (c *collector) Describe(ch chan<- *prometheus.Desc) {
    77  	for _, m := range c.metrics {
    78  		ch <- m.desc(c.ns)
    79  	}
    80  }
    81  
    82  func (c *collector) Collect(ch chan<- prometheus.Metric) {
    83  	c.mu.RLock()
    84  	wg := &sync.WaitGroup{}
    85  	for _, t := range c.tasks {
    86  		wg.Add(1)
    87  		go c.collect(t, ch, true, wg)
    88  	}
    89  storedLoop:
    90  	for {
    91  		// read stored metrics until the channel is flushed
    92  		select {
    93  		case m := <-c.storedMetrics:
    94  			ch <- m
    95  		default:
    96  			break storedLoop
    97  		}
    98  	}
    99  	c.mu.RUnlock()
   100  	wg.Wait()
   101  }
   102  
   103  func (c *collector) collect(t runtime.Task, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
   104  	if wg != nil {
   105  		defer wg.Done()
   106  	}
   107  	ctx := namespaces.WithNamespace(context.Background(), t.Namespace())
   108  	stats, err := t.Stats(ctx)
   109  	if err != nil {
   110  		log.L.WithError(err).Errorf("stat task %s", t.ID())
   111  		return
   112  	}
   113  	data, err := typeurl.UnmarshalAny(stats)
   114  	if err != nil {
   115  		log.L.WithError(err).Errorf("unmarshal stats for %s", t.ID())
   116  		return
   117  	}
   118  	s, ok := data.(*v1.Metrics)
   119  	if !ok {
   120  		log.L.WithError(err).Errorf("invalid metric type for %s", t.ID())
   121  		return
   122  	}
   123  	for _, m := range c.metrics {
   124  		m.collect(t.ID(), t.Namespace(), s, c.ns, ch, block)
   125  	}
   126  }
   127  
   128  // Add adds the provided cgroup and id so that metrics are collected and exported
   129  func (c *collector) Add(t runtime.Task) error {
   130  	if c.ns == nil {
   131  		return nil
   132  	}
   133  	c.mu.Lock()
   134  	defer c.mu.Unlock()
   135  	id := taskID(t.ID(), t.Namespace())
   136  	if _, ok := c.tasks[id]; ok {
   137  		return nil // requests to collect metrics should be idempotent
   138  	}
   139  	c.tasks[id] = t
   140  	return nil
   141  }
   142  
   143  // Remove removes the provided cgroup by id from the collector
   144  func (c *collector) Remove(t runtime.Task) {
   145  	if c.ns == nil {
   146  		return
   147  	}
   148  	c.mu.Lock()
   149  	defer c.mu.Unlock()
   150  	delete(c.tasks, taskID(t.ID(), t.Namespace()))
   151  }