github.com/google/cadvisor@v0.49.1/resctrl/collector.go (about) 1 //go:build linux 2 // +build linux 3 4 // Copyright 2021 Google Inc. All Rights Reserved. 5 // 6 // Licensed under the Apache License, Version 2.0 (the "License"); 7 // you may not use this file except in compliance with the License. 8 // You may obtain a copy of the License at 9 // 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 18 // Collector of resctrl for a container. 19 package resctrl 20 21 import ( 22 "fmt" 23 "os" 24 "path/filepath" 25 "strings" 26 "sync" 27 "time" 28 29 "k8s.io/klog/v2" 30 31 info "github.com/google/cadvisor/info/v1" 32 ) 33 34 const noInterval = 0 35 36 type collector struct { 37 id string 38 interval time.Duration 39 getContainerPids func() ([]string, error) 40 resctrlPath string 41 running bool 42 destroyed bool 43 numberOfNUMANodes int 44 vendorID string 45 mu sync.Mutex 46 inHostNamespace bool 47 } 48 49 func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string, inHostNamespace bool) *collector { 50 return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes, 51 vendorID: vendorID, mu: sync.Mutex{}, inHostNamespace: inHostNamespace} 52 } 53 54 func (c *collector) setup() error { 55 var err error 56 c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) 57 58 if c.interval != noInterval { 59 if err != nil { 60 klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) 61 } else { 62 c.running = true 63 } 64 go func() { 65 for { 66 time.Sleep(c.interval) 67 c.mu.Lock() 68 if c.destroyed { 69 break 70 } 71 klog.V(5).Infof("Trying to check %q containers control group.", c.id) 72 if c.running { 73 err = c.checkMonitoringGroup() 74 if err != nil { 75 c.running = false 76 klog.Errorf("Failed to check %q resctrl collector control group: %s \n Trying again in next intervals.", c.id, err) 77 } 78 } else { 79 c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) 80 if err != nil { 81 c.running = false 82 klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) 83 } 84 } 85 c.mu.Unlock() 86 } 87 }() 88 } else { 89 // There is no interval set, if setup fail, stop. 90 if err != nil { 91 return fmt.Errorf("failed to setup container %q resctrl collector: %w", c.id, err) 92 } 93 c.running = true 94 } 95 96 return nil 97 } 98 99 func (c *collector) checkMonitoringGroup() error { 100 newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) 101 if err != nil { 102 return fmt.Errorf("couldn't obtain mon_group path: %v", err) 103 } 104 105 // Check if container moved between control groups. 106 if newPath != c.resctrlPath { 107 err = c.clear() 108 if err != nil { 109 return fmt.Errorf("couldn't clear previous monitoring group: %w", err) 110 } 111 c.resctrlPath = newPath 112 } 113 114 return nil 115 } 116 117 func (c *collector) UpdateStats(stats *info.ContainerStats) error { 118 c.mu.Lock() 119 defer c.mu.Unlock() 120 if c.running { 121 stats.Resctrl = info.ResctrlStats{} 122 123 resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath, c.vendorID) 124 if err != nil { 125 return err 126 } 127 128 stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, c.numberOfNUMANodes) 129 stats.Resctrl.Cache = make([]info.CacheStats, 0, c.numberOfNUMANodes) 130 131 for _, numaNodeStats := range *resctrlStats.MBMStats { 132 stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth, 133 info.MemoryBandwidthStats{ 134 TotalBytes: numaNodeStats.MBMTotalBytes, 135 LocalBytes: numaNodeStats.MBMLocalBytes, 136 }) 137 } 138 139 for _, numaNodeStats := range *resctrlStats.CMTStats { 140 stats.Resctrl.Cache = append(stats.Resctrl.Cache, 141 info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy}) 142 } 143 } 144 145 return nil 146 } 147 148 func (c *collector) Destroy() { 149 c.mu.Lock() 150 defer c.mu.Unlock() 151 c.running = false 152 err := c.clear() 153 if err != nil { 154 klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err) 155 } 156 c.destroyed = true 157 } 158 159 func (c *collector) clear() error { 160 // Not allowed to remove root or undefined resctrl directory. 161 if c.id != rootContainer && c.resctrlPath != "" { 162 // Remove only own prepared mon group. 163 if strings.HasPrefix(filepath.Base(c.resctrlPath), monGroupPrefix) { 164 err := os.RemoveAll(c.resctrlPath) 165 if err != nil { 166 return fmt.Errorf("couldn't clear mon_group: %v", err) 167 } 168 } 169 } 170 return nil 171 }