github.com/google/cadvisor@v0.49.1/resctrl/collector.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  // Copyright 2021 Google Inc. All Rights Reserved.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Collector of resctrl for a container.
    19  package resctrl
    20  
    21  import (
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"k8s.io/klog/v2"
    30  
    31  	info "github.com/google/cadvisor/info/v1"
    32  )
    33  
    34  const noInterval = 0
    35  
    36  type collector struct {
    37  	id                string
    38  	interval          time.Duration
    39  	getContainerPids  func() ([]string, error)
    40  	resctrlPath       string
    41  	running           bool
    42  	destroyed         bool
    43  	numberOfNUMANodes int
    44  	vendorID          string
    45  	mu                sync.Mutex
    46  	inHostNamespace   bool
    47  }
    48  
    49  func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string, inHostNamespace bool) *collector {
    50  	return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes,
    51  		vendorID: vendorID, mu: sync.Mutex{}, inHostNamespace: inHostNamespace}
    52  }
    53  
    54  func (c *collector) setup() error {
    55  	var err error
    56  	c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
    57  
    58  	if c.interval != noInterval {
    59  		if err != nil {
    60  			klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err)
    61  		} else {
    62  			c.running = true
    63  		}
    64  		go func() {
    65  			for {
    66  				time.Sleep(c.interval)
    67  				c.mu.Lock()
    68  				if c.destroyed {
    69  					break
    70  				}
    71  				klog.V(5).Infof("Trying to check %q containers control group.", c.id)
    72  				if c.running {
    73  					err = c.checkMonitoringGroup()
    74  					if err != nil {
    75  						c.running = false
    76  						klog.Errorf("Failed to check %q resctrl collector control group: %s \n Trying again in next intervals.", c.id, err)
    77  					}
    78  				} else {
    79  					c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
    80  					if err != nil {
    81  						c.running = false
    82  						klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err)
    83  					}
    84  				}
    85  				c.mu.Unlock()
    86  			}
    87  		}()
    88  	} else {
    89  		// There is no interval set, if setup fail, stop.
    90  		if err != nil {
    91  			return fmt.Errorf("failed to setup container %q resctrl collector: %w", c.id, err)
    92  		}
    93  		c.running = true
    94  	}
    95  
    96  	return nil
    97  }
    98  
    99  func (c *collector) checkMonitoringGroup() error {
   100  	newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
   101  	if err != nil {
   102  		return fmt.Errorf("couldn't obtain mon_group path: %v", err)
   103  	}
   104  
   105  	// Check if container moved between control groups.
   106  	if newPath != c.resctrlPath {
   107  		err = c.clear()
   108  		if err != nil {
   109  			return fmt.Errorf("couldn't clear previous monitoring group: %w", err)
   110  		}
   111  		c.resctrlPath = newPath
   112  	}
   113  
   114  	return nil
   115  }
   116  
   117  func (c *collector) UpdateStats(stats *info.ContainerStats) error {
   118  	c.mu.Lock()
   119  	defer c.mu.Unlock()
   120  	if c.running {
   121  		stats.Resctrl = info.ResctrlStats{}
   122  
   123  		resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath, c.vendorID)
   124  		if err != nil {
   125  			return err
   126  		}
   127  
   128  		stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, c.numberOfNUMANodes)
   129  		stats.Resctrl.Cache = make([]info.CacheStats, 0, c.numberOfNUMANodes)
   130  
   131  		for _, numaNodeStats := range *resctrlStats.MBMStats {
   132  			stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth,
   133  				info.MemoryBandwidthStats{
   134  					TotalBytes: numaNodeStats.MBMTotalBytes,
   135  					LocalBytes: numaNodeStats.MBMLocalBytes,
   136  				})
   137  		}
   138  
   139  		for _, numaNodeStats := range *resctrlStats.CMTStats {
   140  			stats.Resctrl.Cache = append(stats.Resctrl.Cache,
   141  				info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy})
   142  		}
   143  	}
   144  
   145  	return nil
   146  }
   147  
   148  func (c *collector) Destroy() {
   149  	c.mu.Lock()
   150  	defer c.mu.Unlock()
   151  	c.running = false
   152  	err := c.clear()
   153  	if err != nil {
   154  		klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err)
   155  	}
   156  	c.destroyed = true
   157  }
   158  
   159  func (c *collector) clear() error {
   160  	// Not allowed to remove root or undefined resctrl directory.
   161  	if c.id != rootContainer && c.resctrlPath != "" {
   162  		// Remove only own prepared mon group.
   163  		if strings.HasPrefix(filepath.Base(c.resctrlPath), monGroupPrefix) {
   164  			err := os.RemoveAll(c.resctrlPath)
   165  			if err != nil {
   166  				return fmt.Errorf("couldn't clear mon_group: %v", err)
   167  			}
   168  		}
   169  	}
   170  	return nil
   171  }