github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/maps/metricsmap/metricsmap.go (about) 1 // Copyright 2016-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metricsmap 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "io/ioutil" 22 "os" 23 "strconv" 24 "strings" 25 "unsafe" 26 27 "github.com/cilium/cilium/pkg/bpf" 28 "github.com/cilium/cilium/pkg/logging" 29 "github.com/cilium/cilium/pkg/logging/logfields" 30 "github.com/cilium/cilium/pkg/metrics" 31 monitorAPI "github.com/cilium/cilium/pkg/monitor/api" 32 33 "github.com/prometheus/client_golang/prometheus" 34 ) 35 36 var ( 37 // Metrics is the bpf metrics map 38 Metrics *bpf.Map 39 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "map-metrics") 40 possibleCpus int 41 ) 42 43 const ( 44 // MapName for metrics map. 45 MapName = "cilium_metrics" 46 // MaxEntries is the maximum number of keys that can be present in the 47 // Metrics Map. 48 MaxEntries = 65536 49 // dirIngress and dirEgress values should match with 50 // METRIC_INGRESS and METRIC_EGRESS in bpf/lib/common.h 51 dirIngress = 1 52 dirEgress = 2 53 dirUnknown = 0 54 55 possibleCPUSysfsPath = "/sys/devices/system/cpu/possible" 56 ) 57 58 // direction is the metrics direction i.e ingress (to an endpoint) 59 // or egress (from an endpoint). If it's none of the above, we return 60 // UNKNOWN direction. 61 var direction = map[uint8]string{ 62 0: "UNKNOWN", 63 1: "INGRESS", 64 2: "EGRESS", 65 } 66 67 type pad3uint16 [3]uint16 68 69 // DeepCopyInto is a deepcopy function, copying the receiver, writing into out. in must be non-nil. 70 func (in *pad3uint16) DeepCopyInto(out *pad3uint16) { 71 copy(out[:], in[:]) 72 return 73 } 74 75 // Key must be in sync with struct metrics_key in <bpf/lib/common.h> 76 // +k8s:deepcopy-gen=true 77 // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapKey 78 type Key struct { 79 Reason uint8 `align:"reason"` 80 Dir uint8 `align:"dir"` 81 Reserved pad3uint16 `align:"reserved"` 82 } 83 84 // Value must be in sync with struct metrics_value in <bpf/lib/common.h> 85 // +k8s:deepcopy-gen=true 86 // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue 87 type Value struct { 88 Count uint64 `align:"count"` 89 Bytes uint64 `align:"bytes"` 90 } 91 92 // +k8s:deepcopy-gen=true 93 // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue 94 // Values is a slice of Values 95 type Values []Value 96 97 // DeepCopyMapValue is an autogenerated deepcopy function, copying the receiver, creating a new bpf.MapValue. 98 func (vs *Values) DeepCopyMapValue() bpf.MapValue { 99 if c := vs.DeepCopy(); c != nil { 100 return &c 101 } 102 return nil 103 } 104 105 // String converts the value into a human readable string format 106 func (vs Values) String() string { 107 sumCount, sumBytes := uint64(0), uint64(0) 108 for _, v := range vs { 109 sumCount += v.Count 110 sumBytes += v.Bytes 111 } 112 return fmt.Sprintf("count:%d bytes:%d", sumCount, sumBytes) 113 } 114 115 // GetValuePtr returns the unsafe pointer to the BPF value. 116 func (vs *Values) GetValuePtr() unsafe.Pointer { 117 return unsafe.Pointer(vs) 118 } 119 120 // String converts the key into a human readable string format 121 func (k *Key) String() string { 122 return fmt.Sprintf("reason:%d dir:%d", k.Reason, k.Dir) 123 } 124 125 // MetricDirection gets the direction in human readable string format 126 func MetricDirection(dir uint8) string { 127 switch dir { 128 case dirIngress: 129 return direction[dir] 130 case dirEgress: 131 return direction[dir] 132 } 133 return direction[dirUnknown] 134 } 135 136 // Direction gets the direction in human readable string format 137 func (k *Key) Direction() string { 138 return MetricDirection(k.Dir) 139 } 140 141 // DropForwardReason gets the forwarded/dropped reason in human readable string format 142 func (k *Key) DropForwardReason() string { 143 return monitorAPI.DropReason(k.Reason) 144 } 145 146 // GetKeyPtr returns the unsafe pointer to the BPF key 147 func (k *Key) GetKeyPtr() unsafe.Pointer { return unsafe.Pointer(k) } 148 149 // String converts the value into a human readable string format 150 func (v *Value) String() string { 151 return fmt.Sprintf("count:%d bytes:%d", v.Count, v.Bytes) 152 } 153 154 // RequestCount returns the drop/forward count in a human readable string format 155 func (v *Value) RequestCount() string { 156 return strconv.FormatUint(v.Count, 10) 157 } 158 159 // RequestBytes returns drop/forward bytes in a human readable string format 160 func (v *Value) RequestBytes() string { 161 return strconv.FormatUint(v.Bytes, 10) 162 } 163 164 // IsDrop checks if the reason is drop or not. 165 func (k *Key) IsDrop() bool { 166 return k.Reason == monitorAPI.DropInvalid || k.Reason >= monitorAPI.DropMin 167 } 168 169 // CountFloat converts the request count to float 170 func (v *Value) CountFloat() float64 { 171 return float64(v.Count) 172 } 173 174 // bytesFloat converts the bytes count to float 175 func (v *Value) bytesFloat() float64 { 176 return float64(v.Bytes) 177 } 178 179 // NewValue returns a new empty instance of the structure representing the BPF 180 // map value 181 func (k *Key) NewValue() bpf.MapValue { return &Value{} } 182 183 // GetValuePtr returns the unsafe pointer to the BPF value. 184 func (v *Value) GetValuePtr() unsafe.Pointer { 185 return unsafe.Pointer(v) 186 } 187 188 func updateMetric(getCounter func() (prometheus.Counter, error), newValue float64) { 189 counter, err := getCounter() 190 if err != nil { 191 log.WithError(err).Warn("Failed to update prometheus metrics") 192 return 193 } 194 195 oldValue := metrics.GetCounterValue(counter) 196 if newValue > oldValue { 197 counter.Add((newValue - oldValue)) 198 } 199 } 200 201 // updatePrometheusMetrics checks the metricsmap key value pair 202 // and determines which prometheus metrics along with respective labels 203 // need to be updated. 204 func updatePrometheusMetrics(key *Key, val *Value) { 205 updateMetric(func() (prometheus.Counter, error) { 206 if key.IsDrop() { 207 return metrics.DropCount.GetMetricWithLabelValues(key.DropForwardReason(), key.Direction()) 208 } 209 return metrics.ForwardCount.GetMetricWithLabelValues(key.Direction()) 210 }, val.CountFloat()) 211 212 updateMetric(func() (prometheus.Counter, error) { 213 if key.IsDrop() { 214 return metrics.DropBytes.GetMetricWithLabelValues(key.DropForwardReason(), key.Direction()) 215 } 216 return metrics.ForwardBytes.GetMetricWithLabelValues(key.Direction()) 217 }, val.bytesFloat()) 218 } 219 220 // SyncMetricsMap is called periodically to sync off the metrics map by 221 // aggregating it into drops (by drop reason and direction) and 222 // forwards (by direction) with the prometheus server. 223 func SyncMetricsMap(ctx context.Context) error { 224 entry := make([]Value, possibleCpus) 225 file := bpf.MapPath(MapName) 226 227 var err error 228 metricsMap := bpf.GetMap(file) 229 if metricsMap == nil { 230 // Open the map and leave it open, since SyncMetricsMap is called 231 // periodically and it makes sense to use an already opened map rather 232 // than opening the map again and again. 233 // This also prevents the constant registration and unregistration of the 234 // Map. 235 metricsMap, err = bpf.OpenMap(file) 236 237 if err != nil { 238 return fmt.Errorf("Unable to open metrics map: %s", err) 239 } 240 } 241 242 var key, nextKey Key 243 for { 244 err := bpf.GetNextKey(metricsMap.GetFd(), unsafe.Pointer(&key), unsafe.Pointer(&nextKey)) 245 if err != nil { 246 break 247 } 248 err = bpf.LookupElement(metricsMap.GetFd(), unsafe.Pointer(&nextKey), unsafe.Pointer(&entry[0])) 249 if err != nil { 250 return fmt.Errorf("unable to lookup metrics map: %s", err) 251 } 252 253 // cannot use `range entry` since, if the first value for a particular 254 // CPU is zero, it never iterates over the next non-zero value. 255 for i := 0; i < possibleCpus; i++ { 256 // Increment Prometheus metrics here. 257 updatePrometheusMetrics(&nextKey, &entry[i]) 258 } 259 key = nextKey 260 261 } 262 return nil 263 } 264 265 // getNumPossibleCPUs returns a total number of possible CPUS, i.e. CPUs that 266 // have been allocated resources and can be brought online if they are present. 267 // The number is retrieved by parsing /sys/device/system/cpu/possible. 268 // 269 // See https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/cpumask.h?h=v4.19#n50 270 // for more details. 271 func getNumPossibleCPUs() int { 272 f, err := os.Open(possibleCPUSysfsPath) 273 if err != nil { 274 log.WithError(err).Errorf("unable to open %q", possibleCPUSysfsPath) 275 } 276 defer f.Close() 277 278 return getNumPossibleCPUsFromReader(f) 279 } 280 281 func getNumPossibleCPUsFromReader(r io.Reader) int { 282 out, err := ioutil.ReadAll(r) 283 if err != nil { 284 log.WithError(err).Errorf("unable to read %q to get CPU count", possibleCPUSysfsPath) 285 return 0 286 } 287 288 var start, end int 289 count := 0 290 for _, s := range strings.Split(string(out), ",") { 291 // Go's scanf will return an error if a format cannot be fully matched. 292 // So, just ignore it, as a partial match (e.g. when there is only one 293 // CPU) is expected. 294 n, err := fmt.Sscanf(s, "%d-%d", &start, &end) 295 296 switch n { 297 case 0: 298 log.WithError(err).Errorf("failed to scan %q to retrieve number of possible CPUs!", s) 299 return 0 300 case 1: 301 count++ 302 default: 303 count += (end - start + 1) 304 } 305 } 306 307 return count 308 } 309 310 func init() { 311 possibleCpus = getNumPossibleCPUs() 312 313 vs := make(Values, possibleCpus) 314 315 // Metrics is a mapping of all packet drops and forwards associated with 316 // the node on ingress/egress direction 317 Metrics = bpf.NewPerCPUHashMap( 318 MapName, 319 &Key{}, 320 int(unsafe.Sizeof(Key{})), 321 &vs, 322 int(unsafe.Sizeof(Value{})), 323 possibleCpus, 324 MaxEntries, 325 0, 0, 326 bpf.ConvertKeyValue, 327 ) 328 }