github.com/cilium/cilium@v1.16.2/pkg/signal/signal.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package signal
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/binary"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"sync/atomic"
    14  
    15  	"github.com/cilium/ebpf/perf"
    16  	"github.com/sirupsen/logrus"
    17  
    18  	"github.com/cilium/cilium/pkg/byteorder"
    19  	"github.com/cilium/cilium/pkg/lock"
    20  	"github.com/cilium/cilium/pkg/logging"
    21  	"github.com/cilium/cilium/pkg/logging/logfields"
    22  	"github.com/cilium/cilium/pkg/maps/signalmap"
    23  	"github.com/cilium/cilium/pkg/metrics"
    24  )
    25  
    26  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "signal")
    27  
    28  type SignalType uint32
    29  
    30  const (
    31  	// SignalNatFillUp denotes potential congestion on the NAT table
    32  	SignalNatFillUp SignalType = iota
    33  	// SignalCTFillUp denotes potential congestion on the CT table
    34  	SignalCTFillUp
    35  	// SignalAuthRequired denotes a connection dropped due to missing authentication
    36  	SignalAuthRequired
    37  	SignalTypeMax
    38  )
    39  
    40  var signalName = [SignalTypeMax]string{
    41  	SignalNatFillUp:    "nat_fill_up",
    42  	SignalCTFillUp:     "ct_fill_up",
    43  	SignalAuthRequired: "auth_required",
    44  }
    45  
    46  // SignalHandler parses signal data from the perf message via a reader.
    47  // Signal handler functions are only ever called from a single goroutine.
    48  // A nil reader is passed when the handler is closed.
    49  // Returns low-cardinality representation of the signal data to be used in a metric.
    50  type SignalHandler func(io.Reader) (metricData string, err error)
    51  
    52  var (
    53  	ErrFullChannel         = errors.New("full channel")
    54  	ErrNilChannel          = errors.New("nil channel")
    55  	ErrRuntimeRegistration = errors.New("runtime registration not supported")
    56  	ErrNoHandlers          = errors.New("no registered signal handlers")
    57  )
    58  
    59  // signalSet is a bit mask of signals that have active handlers.
    60  // Zero when all handlers are muted.
    61  type signalSet uint64
    62  
    63  type SignalManager interface {
    64  	// RegisterHandler must be called during initialization of the cells using signals.
    65  	RegisterHandler(handler SignalHandler, signals ...SignalType) error
    66  
    67  	MuteSignals(signals ...SignalType) error
    68  	UnmuteSignals(signals ...SignalType) error
    69  }
    70  
    71  type signalManager struct {
    72  	signalmap signalmap.Map
    73  	handlers  [SignalTypeMax]SignalHandler
    74  	events    signalmap.PerfReader
    75  	done      chan struct{}
    76  
    77  	// mutex is needed to sync mute/unmute with events Pause/Resume
    78  	// Atomic Uint64 is used to allow reading active signal bits without
    79  	// taking the mutex
    80  	mutex         lock.Mutex
    81  	activeSignals atomic.Uint64
    82  }
    83  
    84  func newSignalManager(signalMap signalmap.Map) *signalManager {
    85  	return &signalManager{
    86  		signalmap: signalMap,
    87  		done:      make(chan struct{}),
    88  	}
    89  }
    90  
    91  func (sm *signalManager) isSignalMuted(signal SignalType) bool {
    92  	signals := signalSet(sm.activeSignals.Load())
    93  	return signals&(signalSet(1)<<signal) == 0
    94  }
    95  
    96  func (sm *signalManager) isMuted() bool {
    97  	return sm.activeSignals.Load() == 0
    98  }
    99  
   100  func (sm *signalManager) setMuted(signals signalSet) {
   101  	sm.mutex.Lock()
   102  	defer sm.mutex.Unlock()
   103  
   104  	old := sm.activeSignals.Load()
   105  	new := old &^ uint64(signals)
   106  	sm.activeSignals.Store(new)
   107  
   108  	if old != 0 && new == 0 && sm.events != nil {
   109  		// If all signals are muted, then we can turn off perf
   110  		// RB notifications from kernel side, which is much more efficient as
   111  		// no new message is pushed into the RB.
   112  		sm.events.Pause()
   113  	}
   114  }
   115  
   116  func (sm *signalManager) setUnmuted(signals signalSet) {
   117  	sm.mutex.Lock()
   118  	defer sm.mutex.Unlock()
   119  
   120  	old := sm.activeSignals.Load()
   121  	new := old | uint64(signals)
   122  	sm.activeSignals.Store(new)
   123  
   124  	if old == 0 && new != 0 && sm.events != nil {
   125  		// If any of the signals are unmuted, then we must turn on perf
   126  		// RB notifications from kernel side.
   127  		sm.events.Resume()
   128  	}
   129  }
   130  
   131  func signalCollectMetrics(signalType, signalData, signalStatus string) {
   132  	metrics.SignalsHandled.WithLabelValues(signalType, signalData, signalStatus).Inc()
   133  }
   134  
   135  func (sm *signalManager) signalReceive(msg *perf.Record) {
   136  	var which SignalType
   137  	reader := bytes.NewReader(msg.RawSample)
   138  	if err := binary.Read(reader, byteorder.Native, &which); err != nil {
   139  		log.WithError(err).Warning("cannot parse signal type from BPF datapath")
   140  		return
   141  	}
   142  
   143  	if which >= SignalTypeMax {
   144  		log.WithField(logfields.Signal, which).Warning("invalid signal type")
   145  		return
   146  	}
   147  
   148  	name := signalName[which]
   149  	handler := sm.handlers[which]
   150  	if handler == nil {
   151  		signalCollectMetrics(name, "", "unregistered")
   152  		return
   153  	}
   154  	if sm.isSignalMuted(which) {
   155  		signalCollectMetrics(name, "", "muted")
   156  		return
   157  	}
   158  
   159  	status := "received"
   160  	metricData, err := handler(reader)
   161  	if err != nil {
   162  		if errors.Is(err, ErrFullChannel) {
   163  			status = "channel overflow"
   164  		} else {
   165  			log.WithError(err).WithField(logfields.Signal, name).Warning("cannot parse signal data from BPF datapath")
   166  			status = "parse error"
   167  		}
   168  	}
   169  	signalCollectMetrics(name, metricData, status)
   170  }
   171  
   172  // MuteSignals tells to not send any new events for the given signals.
   173  func (sm *signalManager) MuteSignals(signals ...SignalType) error {
   174  	var set signalSet
   175  	for _, signal := range signals {
   176  		if signal >= SignalTypeMax {
   177  			return fmt.Errorf("signal number not supported: %d", signal)
   178  		}
   179  		set |= signalSet(1) << signal
   180  	}
   181  	sm.setMuted(set)
   182  	return nil
   183  }
   184  
   185  // UnmuteSignals tells to allow sending new events to the given signals.
   186  func (sm *signalManager) UnmuteSignals(signals ...SignalType) error {
   187  	var set signalSet
   188  	for _, signal := range signals {
   189  		if signal >= SignalTypeMax {
   190  			return fmt.Errorf("signal number not supported: %d", signal)
   191  		}
   192  		set |= signalSet(1) << signal
   193  	}
   194  	sm.setUnmuted(set)
   195  	return nil
   196  }
   197  
   198  // ChannelHandler is a generic function returning a SignalHandler that writes
   199  // data from a reader to the channel.
   200  func ChannelHandler[T fmt.Stringer](ch chan<- T) SignalHandler {
   201  	closed := false
   202  	return func(reader io.Reader) (string, error) {
   203  		if ch == nil {
   204  			return "", ErrNilChannel
   205  		}
   206  		if reader == nil {
   207  			if !closed {
   208  				closed = true
   209  				close(ch)
   210  			}
   211  			return "", io.EOF
   212  		}
   213  		var data T
   214  		if err := binary.Read(reader, byteorder.Native, &data); err != nil {
   215  			return "", err
   216  		}
   217  		select {
   218  		case ch <- data:
   219  		default:
   220  			return "", ErrFullChannel
   221  		}
   222  		return data.String(), nil
   223  	}
   224  }
   225  
   226  // RegisterHandler registers a signal handler for the given signals.
   227  func (sm *signalManager) RegisterHandler(handler SignalHandler, signals ...SignalType) error {
   228  	if sm.events != nil {
   229  		return ErrRuntimeRegistration
   230  	}
   231  
   232  	for _, signal := range signals {
   233  		if signal >= SignalTypeMax {
   234  			return fmt.Errorf("signal number not supported: %d", signal)
   235  		}
   236  		if sm.handlers[signal] != nil {
   237  			return fmt.Errorf("channel for signal number already registered: %d", signal)
   238  		}
   239  	}
   240  
   241  	for _, signal := range signals {
   242  		sm.handlers[signal] = handler
   243  		sm.setUnmuted(signalSet(1) << signal)
   244  	}
   245  	return nil
   246  }
   247  
   248  // Start signal listener. Called after all the handlers have registered and signalmap.open() has
   249  // been called by hive.
   250  func (sm *signalManager) start() error {
   251  	var err error
   252  
   253  	// Start listening for signals only if there are registered handlers
   254  	if sm.isMuted() {
   255  		return ErrNoHandlers
   256  	}
   257  
   258  	sm.events, err = sm.signalmap.NewReader()
   259  	if err != nil {
   260  		return fmt.Errorf("cannot open %s map! Ignoring signals: %w", sm.signalmap.MapName(), err)
   261  	}
   262  
   263  	go func() {
   264  		log.Info("Datapath signal listener running")
   265  		for {
   266  			record, err := sm.events.Read()
   267  			if err != nil {
   268  				if errors.Is(err, os.ErrClosed) {
   269  					break
   270  				}
   271  				signalCollectMetrics("", "", "error")
   272  				log.WithError(err).WithFields(logrus.Fields{
   273  					logfields.BPFMapName: signalmap.MapName,
   274  				}).Error("failed to read event")
   275  				continue
   276  			}
   277  
   278  			if record.LostSamples > 0 {
   279  				signalCollectMetrics("", "", "lost")
   280  				continue
   281  			}
   282  			sm.signalReceive(&record)
   283  		}
   284  		log.Info("Datapath signal listener exiting")
   285  
   286  		// Close registered signal channels
   287  		for i, handler := range sm.handlers {
   288  			if handler != nil {
   289  				handler(nil)         // let the handler close it's channel
   290  				sm.handlers[i] = nil // let handler be GC'd
   291  			}
   292  		}
   293  		close(sm.done)
   294  		log.Info("Datapath signal listener done")
   295  	}()
   296  
   297  	return nil
   298  }
   299  
   300  // stop closes all signal channels
   301  func (sm *signalManager) stop() error {
   302  	err := sm.events.Close()
   303  	if err == nil {
   304  		<-sm.done
   305  	}
   306  	return err
   307  }