github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/maps/metricsmap/metricsmap.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metricsmap
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"os"
    23  	"strconv"
    24  	"strings"
    25  	"unsafe"
    26  
    27  	"github.com/cilium/cilium/pkg/bpf"
    28  	"github.com/cilium/cilium/pkg/logging"
    29  	"github.com/cilium/cilium/pkg/logging/logfields"
    30  	"github.com/cilium/cilium/pkg/metrics"
    31  	monitorAPI "github.com/cilium/cilium/pkg/monitor/api"
    32  
    33  	"github.com/prometheus/client_golang/prometheus"
    34  )
    35  
    36  var (
    37  	// Metrics is the bpf metrics map
    38  	Metrics      *bpf.Map
    39  	log          = logging.DefaultLogger.WithField(logfields.LogSubsys, "map-metrics")
    40  	possibleCpus int
    41  )
    42  
    43  const (
    44  	// MapName for metrics map.
    45  	MapName = "cilium_metrics"
    46  	// MaxEntries is the maximum number of keys that can be present in the
    47  	// Metrics Map.
    48  	MaxEntries = 65536
    49  	// dirIngress and dirEgress values should match with
    50  	// METRIC_INGRESS and METRIC_EGRESS in bpf/lib/common.h
    51  	dirIngress = 1
    52  	dirEgress  = 2
    53  	dirUnknown = 0
    54  
    55  	possibleCPUSysfsPath = "/sys/devices/system/cpu/possible"
    56  )
    57  
    58  // direction is the metrics direction i.e ingress (to an endpoint)
    59  // or egress (from an endpoint). If it's none of the above, we return
    60  // UNKNOWN direction.
    61  var direction = map[uint8]string{
    62  	0: "UNKNOWN",
    63  	1: "INGRESS",
    64  	2: "EGRESS",
    65  }
    66  
    67  type pad3uint16 [3]uint16
    68  
    69  // DeepCopyInto is a deepcopy function, copying the receiver, writing into out. in must be non-nil.
    70  func (in *pad3uint16) DeepCopyInto(out *pad3uint16) {
    71  	copy(out[:], in[:])
    72  	return
    73  }
    74  
    75  // Key must be in sync with struct metrics_key in <bpf/lib/common.h>
    76  // +k8s:deepcopy-gen=true
    77  // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapKey
    78  type Key struct {
    79  	Reason   uint8      `align:"reason"`
    80  	Dir      uint8      `align:"dir"`
    81  	Reserved pad3uint16 `align:"reserved"`
    82  }
    83  
    84  // Value must be in sync with struct metrics_value in <bpf/lib/common.h>
    85  // +k8s:deepcopy-gen=true
    86  // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue
    87  type Value struct {
    88  	Count uint64 `align:"count"`
    89  	Bytes uint64 `align:"bytes"`
    90  }
    91  
    92  // +k8s:deepcopy-gen=true
    93  // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue
    94  // Values is a slice of Values
    95  type Values []Value
    96  
    97  // DeepCopyMapValue is an autogenerated deepcopy function, copying the receiver, creating a new bpf.MapValue.
    98  func (vs *Values) DeepCopyMapValue() bpf.MapValue {
    99  	if c := vs.DeepCopy(); c != nil {
   100  		return &c
   101  	}
   102  	return nil
   103  }
   104  
   105  // String converts the value into a human readable string format
   106  func (vs Values) String() string {
   107  	sumCount, sumBytes := uint64(0), uint64(0)
   108  	for _, v := range vs {
   109  		sumCount += v.Count
   110  		sumBytes += v.Bytes
   111  	}
   112  	return fmt.Sprintf("count:%d bytes:%d", sumCount, sumBytes)
   113  }
   114  
   115  // GetValuePtr returns the unsafe pointer to the BPF value.
   116  func (vs *Values) GetValuePtr() unsafe.Pointer {
   117  	return unsafe.Pointer(vs)
   118  }
   119  
   120  // String converts the key into a human readable string format
   121  func (k *Key) String() string {
   122  	return fmt.Sprintf("reason:%d dir:%d", k.Reason, k.Dir)
   123  }
   124  
   125  // MetricDirection gets the direction in human readable string format
   126  func MetricDirection(dir uint8) string {
   127  	switch dir {
   128  	case dirIngress:
   129  		return direction[dir]
   130  	case dirEgress:
   131  		return direction[dir]
   132  	}
   133  	return direction[dirUnknown]
   134  }
   135  
   136  // Direction gets the direction in human readable string format
   137  func (k *Key) Direction() string {
   138  	return MetricDirection(k.Dir)
   139  }
   140  
   141  // DropForwardReason gets the forwarded/dropped reason in human readable string format
   142  func (k *Key) DropForwardReason() string {
   143  	return monitorAPI.DropReason(k.Reason)
   144  }
   145  
   146  // GetKeyPtr returns the unsafe pointer to the BPF key
   147  func (k *Key) GetKeyPtr() unsafe.Pointer { return unsafe.Pointer(k) }
   148  
   149  // String converts the value into a human readable string format
   150  func (v *Value) String() string {
   151  	return fmt.Sprintf("count:%d bytes:%d", v.Count, v.Bytes)
   152  }
   153  
   154  // RequestCount returns the drop/forward count in a human readable string format
   155  func (v *Value) RequestCount() string {
   156  	return strconv.FormatUint(v.Count, 10)
   157  }
   158  
   159  // RequestBytes returns drop/forward bytes in a human readable string format
   160  func (v *Value) RequestBytes() string {
   161  	return strconv.FormatUint(v.Bytes, 10)
   162  }
   163  
   164  // IsDrop checks if the reason is drop or not.
   165  func (k *Key) IsDrop() bool {
   166  	return k.Reason == monitorAPI.DropInvalid || k.Reason >= monitorAPI.DropMin
   167  }
   168  
   169  // CountFloat converts the request count to float
   170  func (v *Value) CountFloat() float64 {
   171  	return float64(v.Count)
   172  }
   173  
   174  // bytesFloat converts the bytes count to float
   175  func (v *Value) bytesFloat() float64 {
   176  	return float64(v.Bytes)
   177  }
   178  
   179  // NewValue returns a new empty instance of the structure representing the BPF
   180  // map value
   181  func (k *Key) NewValue() bpf.MapValue { return &Value{} }
   182  
   183  // GetValuePtr returns the unsafe pointer to the BPF value.
   184  func (v *Value) GetValuePtr() unsafe.Pointer {
   185  	return unsafe.Pointer(v)
   186  }
   187  
   188  func updateMetric(getCounter func() (prometheus.Counter, error), newValue float64) {
   189  	counter, err := getCounter()
   190  	if err != nil {
   191  		log.WithError(err).Warn("Failed to update prometheus metrics")
   192  		return
   193  	}
   194  
   195  	oldValue := metrics.GetCounterValue(counter)
   196  	if newValue > oldValue {
   197  		counter.Add((newValue - oldValue))
   198  	}
   199  }
   200  
   201  // updatePrometheusMetrics checks the metricsmap key value pair
   202  // and determines which prometheus metrics along with respective labels
   203  // need to be updated.
   204  func updatePrometheusMetrics(key *Key, val *Value) {
   205  	updateMetric(func() (prometheus.Counter, error) {
   206  		if key.IsDrop() {
   207  			return metrics.DropCount.GetMetricWithLabelValues(key.DropForwardReason(), key.Direction())
   208  		}
   209  		return metrics.ForwardCount.GetMetricWithLabelValues(key.Direction())
   210  	}, val.CountFloat())
   211  
   212  	updateMetric(func() (prometheus.Counter, error) {
   213  		if key.IsDrop() {
   214  			return metrics.DropBytes.GetMetricWithLabelValues(key.DropForwardReason(), key.Direction())
   215  		}
   216  		return metrics.ForwardBytes.GetMetricWithLabelValues(key.Direction())
   217  	}, val.bytesFloat())
   218  }
   219  
   220  // SyncMetricsMap is called periodically to sync off the metrics map by
   221  // aggregating it into drops (by drop reason and direction) and
   222  // forwards (by direction) with the prometheus server.
   223  func SyncMetricsMap(ctx context.Context) error {
   224  	entry := make([]Value, possibleCpus)
   225  	file := bpf.MapPath(MapName)
   226  
   227  	var err error
   228  	metricsMap := bpf.GetMap(file)
   229  	if metricsMap == nil {
   230  		// Open the map and leave it open, since SyncMetricsMap is called
   231  		// periodically and it makes sense to use an already opened map rather
   232  		// than opening the map again and again.
   233  		// This also prevents the constant registration and unregistration of the
   234  		// Map.
   235  		metricsMap, err = bpf.OpenMap(file)
   236  
   237  		if err != nil {
   238  			return fmt.Errorf("Unable to open metrics map: %s", err)
   239  		}
   240  	}
   241  
   242  	var key, nextKey Key
   243  	for {
   244  		err := bpf.GetNextKey(metricsMap.GetFd(), unsafe.Pointer(&key), unsafe.Pointer(&nextKey))
   245  		if err != nil {
   246  			break
   247  		}
   248  		err = bpf.LookupElement(metricsMap.GetFd(), unsafe.Pointer(&nextKey), unsafe.Pointer(&entry[0]))
   249  		if err != nil {
   250  			return fmt.Errorf("unable to lookup metrics map: %s", err)
   251  		}
   252  
   253  		// cannot use `range entry` since, if the first value for a particular
   254  		// CPU is zero, it never iterates over the next non-zero value.
   255  		for i := 0; i < possibleCpus; i++ {
   256  			// Increment Prometheus metrics here.
   257  			updatePrometheusMetrics(&nextKey, &entry[i])
   258  		}
   259  		key = nextKey
   260  
   261  	}
   262  	return nil
   263  }
   264  
   265  // getNumPossibleCPUs returns a total number of possible CPUS, i.e. CPUs that
   266  // have been allocated resources and can be brought online if they are present.
   267  // The number is retrieved by parsing /sys/device/system/cpu/possible.
   268  //
   269  // See https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/cpumask.h?h=v4.19#n50
   270  // for more details.
   271  func getNumPossibleCPUs() int {
   272  	f, err := os.Open(possibleCPUSysfsPath)
   273  	if err != nil {
   274  		log.WithError(err).Errorf("unable to open %q", possibleCPUSysfsPath)
   275  	}
   276  	defer f.Close()
   277  
   278  	return getNumPossibleCPUsFromReader(f)
   279  }
   280  
   281  func getNumPossibleCPUsFromReader(r io.Reader) int {
   282  	out, err := ioutil.ReadAll(r)
   283  	if err != nil {
   284  		log.WithError(err).Errorf("unable to read %q to get CPU count", possibleCPUSysfsPath)
   285  		return 0
   286  	}
   287  
   288  	var start, end int
   289  	count := 0
   290  	for _, s := range strings.Split(string(out), ",") {
   291  		// Go's scanf will return an error if a format cannot be fully matched.
   292  		// So, just ignore it, as a partial match (e.g. when there is only one
   293  		// CPU) is expected.
   294  		n, err := fmt.Sscanf(s, "%d-%d", &start, &end)
   295  
   296  		switch n {
   297  		case 0:
   298  			log.WithError(err).Errorf("failed to scan %q to retrieve number of possible CPUs!", s)
   299  			return 0
   300  		case 1:
   301  			count++
   302  		default:
   303  			count += (end - start + 1)
   304  		}
   305  	}
   306  
   307  	return count
   308  }
   309  
   310  func init() {
   311  	possibleCpus = getNumPossibleCPUs()
   312  
   313  	vs := make(Values, possibleCpus)
   314  
   315  	// Metrics is a mapping of all packet drops and forwards associated with
   316  	// the node on ingress/egress direction
   317  	Metrics = bpf.NewPerCPUHashMap(
   318  		MapName,
   319  		&Key{},
   320  		int(unsafe.Sizeof(Key{})),
   321  		&vs,
   322  		int(unsafe.Sizeof(Value{})),
   323  		possibleCpus,
   324  		MaxEntries,
   325  		0, 0,
   326  		bpf.ConvertKeyValue,
   327  	)
   328  }