github.com/cilium/cilium@v1.16.2/pkg/hubble/monitor/consumer.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package monitor
     5  
     6  import (
     7  	"strings"
     8  
     9  	"github.com/google/uuid"
    10  	"github.com/sirupsen/logrus"
    11  
    12  	flowpb "github.com/cilium/cilium/api/v1/flow"
    13  	"github.com/cilium/cilium/pkg/hubble/metrics"
    14  	observerTypes "github.com/cilium/cilium/pkg/hubble/observer/types"
    15  	"github.com/cilium/cilium/pkg/lock"
    16  	"github.com/cilium/cilium/pkg/logging"
    17  	monitorConsumer "github.com/cilium/cilium/pkg/monitor/agent/consumer"
    18  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    19  	"github.com/cilium/cilium/pkg/time"
    20  )
    21  
    22  // Observer is the receiver of MonitorEvents
    23  type Observer interface {
    24  	GetEventsChannel() chan *observerTypes.MonitorEvent
    25  	GetLogger() logrus.FieldLogger
    26  }
    27  
    28  // consumer implements monitorConsumer.MonitorConsumer
    29  type consumer struct {
    30  	observer      Observer
    31  	numEventsLost uint64
    32  	lostLock      lock.Mutex
    33  	logLimiter    logging.Limiter
    34  }
    35  
    36  // NewConsumer returns an initialized pointer to consumer.
    37  func NewConsumer(observer Observer) monitorConsumer.MonitorConsumer {
    38  	mc := &consumer{
    39  		observer:      observer,
    40  		numEventsLost: 0,
    41  		logLimiter:    logging.NewLimiter(30*time.Second, 1),
    42  	}
    43  	return mc
    44  }
    45  
    46  // sendEventQueueLostEvents tries to send the current value of the lost events
    47  // counter to the observer. If it succeeds to enqueue a notification, it
    48  // resets the counter.
    49  func (c *consumer) sendNumLostEvents() {
    50  	c.lostLock.Lock()
    51  	defer c.lostLock.Unlock()
    52  	// check again, in case multiple
    53  	// routines contended the lock
    54  	if c.numEventsLost == 0 {
    55  		return
    56  	}
    57  
    58  	numEventsLostNotification := &observerTypes.MonitorEvent{
    59  		UUID:      uuid.New(),
    60  		Timestamp: time.Now(),
    61  		NodeName:  nodeTypes.GetAbsoluteNodeName(),
    62  		Payload: &observerTypes.LostEvent{
    63  			Source:        observerTypes.LostEventSourceEventsQueue,
    64  			NumLostEvents: c.numEventsLost,
    65  		},
    66  	}
    67  	select {
    68  	case c.observer.GetEventsChannel() <- numEventsLostNotification:
    69  		// We now now safely reset the counter, as at this point have
    70  		// successfully notified the observer about the amount of events
    71  		// that were lost since the previous LostEvent message
    72  		c.numEventsLost = 0
    73  	default:
    74  		// We do not need to bump the numEventsLost counter here, as we will
    75  		// try to send a new LostEvent notification again during the next
    76  		// invocation of sendEvent
    77  	}
    78  }
    79  
    80  // sendEvent enqueues an event in the observer. If this is not possible, it
    81  // keeps a counter of lost events, which it will regularly try to send to the
    82  // observer as well
    83  func (c *consumer) sendEvent(event *observerTypes.MonitorEvent) {
    84  	if c.numEventsLost > 0 {
    85  		c.sendNumLostEvents()
    86  	}
    87  	select {
    88  	case c.observer.GetEventsChannel() <- event:
    89  	default:
    90  		c.countDroppedEvent()
    91  	}
    92  }
    93  
    94  // countDroppedEvent logs that the events channel is full
    95  // and counts how many messages it has lost.
    96  func (c *consumer) countDroppedEvent() {
    97  	c.lostLock.Lock()
    98  	defer c.lostLock.Unlock()
    99  	if c.numEventsLost == 0 && c.logLimiter.Allow() {
   100  		c.observer.GetLogger().WithField("related-metric", "hubble_lost_events_total").
   101  			Warning("hubble events queue is full: dropping messages; consider increasing the queue size (hubble-event-queue-size) or provisioning more CPU")
   102  	}
   103  	c.numEventsLost++
   104  	metrics.LostEvents.WithLabelValues(strings.ToLower(flowpb.LostEventSource_OBSERVER_EVENTS_QUEUE.String())).Inc()
   105  }
   106  
   107  // NotifyAgentEvent implements monitorConsumer.MonitorConsumer
   108  func (c *consumer) NotifyAgentEvent(typ int, message interface{}) {
   109  	c.sendEvent(&observerTypes.MonitorEvent{
   110  		UUID:      uuid.New(),
   111  		Timestamp: time.Now(),
   112  		NodeName:  nodeTypes.GetAbsoluteNodeName(),
   113  		Payload: &observerTypes.AgentEvent{
   114  			Type:    typ,
   115  			Message: message,
   116  		},
   117  	})
   118  }
   119  
   120  // NotifyPerfEvent implements monitorConsumer.MonitorConsumer
   121  func (c *consumer) NotifyPerfEvent(data []byte, cpu int) {
   122  	c.sendEvent(&observerTypes.MonitorEvent{
   123  		UUID:      uuid.New(),
   124  		Timestamp: time.Now(),
   125  		NodeName:  nodeTypes.GetAbsoluteNodeName(),
   126  		Payload: &observerTypes.PerfEvent{
   127  			Data: data,
   128  			CPU:  cpu,
   129  		},
   130  	})
   131  }
   132  
   133  // NotifyPerfEventLost implements monitorConsumer.MonitorConsumer
   134  func (c *consumer) NotifyPerfEventLost(numLostEvents uint64, cpu int) {
   135  	c.sendEvent(&observerTypes.MonitorEvent{
   136  		UUID:      uuid.New(),
   137  		Timestamp: time.Now(),
   138  		NodeName:  nodeTypes.GetAbsoluteNodeName(),
   139  		Payload: &observerTypes.LostEvent{
   140  			Source:        observerTypes.LostEventSourcePerfRingBuffer,
   141  			NumLostEvents: numLostEvents,
   142  			CPU:           cpu,
   143  		},
   144  	})
   145  	metrics.LostEvents.WithLabelValues(strings.ToLower(flowpb.LostEventSource_PERF_EVENT_RING_BUFFER.String())).Add(float64(numLostEvents))
   146  }