github.com/cilium/cilium@v1.16.2/pkg/hubble/monitor/consumer.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package monitor 5 6 import ( 7 "strings" 8 9 "github.com/google/uuid" 10 "github.com/sirupsen/logrus" 11 12 flowpb "github.com/cilium/cilium/api/v1/flow" 13 "github.com/cilium/cilium/pkg/hubble/metrics" 14 observerTypes "github.com/cilium/cilium/pkg/hubble/observer/types" 15 "github.com/cilium/cilium/pkg/lock" 16 "github.com/cilium/cilium/pkg/logging" 17 monitorConsumer "github.com/cilium/cilium/pkg/monitor/agent/consumer" 18 nodeTypes "github.com/cilium/cilium/pkg/node/types" 19 "github.com/cilium/cilium/pkg/time" 20 ) 21 22 // Observer is the receiver of MonitorEvents 23 type Observer interface { 24 GetEventsChannel() chan *observerTypes.MonitorEvent 25 GetLogger() logrus.FieldLogger 26 } 27 28 // consumer implements monitorConsumer.MonitorConsumer 29 type consumer struct { 30 observer Observer 31 numEventsLost uint64 32 lostLock lock.Mutex 33 logLimiter logging.Limiter 34 } 35 36 // NewConsumer returns an initialized pointer to consumer. 37 func NewConsumer(observer Observer) monitorConsumer.MonitorConsumer { 38 mc := &consumer{ 39 observer: observer, 40 numEventsLost: 0, 41 logLimiter: logging.NewLimiter(30*time.Second, 1), 42 } 43 return mc 44 } 45 46 // sendEventQueueLostEvents tries to send the current value of the lost events 47 // counter to the observer. If it succeeds to enqueue a notification, it 48 // resets the counter. 49 func (c *consumer) sendNumLostEvents() { 50 c.lostLock.Lock() 51 defer c.lostLock.Unlock() 52 // check again, in case multiple 53 // routines contended the lock 54 if c.numEventsLost == 0 { 55 return 56 } 57 58 numEventsLostNotification := &observerTypes.MonitorEvent{ 59 UUID: uuid.New(), 60 Timestamp: time.Now(), 61 NodeName: nodeTypes.GetAbsoluteNodeName(), 62 Payload: &observerTypes.LostEvent{ 63 Source: observerTypes.LostEventSourceEventsQueue, 64 NumLostEvents: c.numEventsLost, 65 }, 66 } 67 select { 68 case c.observer.GetEventsChannel() <- numEventsLostNotification: 69 // We now now safely reset the counter, as at this point have 70 // successfully notified the observer about the amount of events 71 // that were lost since the previous LostEvent message 72 c.numEventsLost = 0 73 default: 74 // We do not need to bump the numEventsLost counter here, as we will 75 // try to send a new LostEvent notification again during the next 76 // invocation of sendEvent 77 } 78 } 79 80 // sendEvent enqueues an event in the observer. If this is not possible, it 81 // keeps a counter of lost events, which it will regularly try to send to the 82 // observer as well 83 func (c *consumer) sendEvent(event *observerTypes.MonitorEvent) { 84 if c.numEventsLost > 0 { 85 c.sendNumLostEvents() 86 } 87 select { 88 case c.observer.GetEventsChannel() <- event: 89 default: 90 c.countDroppedEvent() 91 } 92 } 93 94 // countDroppedEvent logs that the events channel is full 95 // and counts how many messages it has lost. 96 func (c *consumer) countDroppedEvent() { 97 c.lostLock.Lock() 98 defer c.lostLock.Unlock() 99 if c.numEventsLost == 0 && c.logLimiter.Allow() { 100 c.observer.GetLogger().WithField("related-metric", "hubble_lost_events_total"). 101 Warning("hubble events queue is full: dropping messages; consider increasing the queue size (hubble-event-queue-size) or provisioning more CPU") 102 } 103 c.numEventsLost++ 104 metrics.LostEvents.WithLabelValues(strings.ToLower(flowpb.LostEventSource_OBSERVER_EVENTS_QUEUE.String())).Inc() 105 } 106 107 // NotifyAgentEvent implements monitorConsumer.MonitorConsumer 108 func (c *consumer) NotifyAgentEvent(typ int, message interface{}) { 109 c.sendEvent(&observerTypes.MonitorEvent{ 110 UUID: uuid.New(), 111 Timestamp: time.Now(), 112 NodeName: nodeTypes.GetAbsoluteNodeName(), 113 Payload: &observerTypes.AgentEvent{ 114 Type: typ, 115 Message: message, 116 }, 117 }) 118 } 119 120 // NotifyPerfEvent implements monitorConsumer.MonitorConsumer 121 func (c *consumer) NotifyPerfEvent(data []byte, cpu int) { 122 c.sendEvent(&observerTypes.MonitorEvent{ 123 UUID: uuid.New(), 124 Timestamp: time.Now(), 125 NodeName: nodeTypes.GetAbsoluteNodeName(), 126 Payload: &observerTypes.PerfEvent{ 127 Data: data, 128 CPU: cpu, 129 }, 130 }) 131 } 132 133 // NotifyPerfEventLost implements monitorConsumer.MonitorConsumer 134 func (c *consumer) NotifyPerfEventLost(numLostEvents uint64, cpu int) { 135 c.sendEvent(&observerTypes.MonitorEvent{ 136 UUID: uuid.New(), 137 Timestamp: time.Now(), 138 NodeName: nodeTypes.GetAbsoluteNodeName(), 139 Payload: &observerTypes.LostEvent{ 140 Source: observerTypes.LostEventSourcePerfRingBuffer, 141 NumLostEvents: numLostEvents, 142 CPU: cpu, 143 }, 144 }) 145 metrics.LostEvents.WithLabelValues(strings.ToLower(flowpb.LostEventSource_PERF_EVENT_RING_BUFFER.String())).Add(float64(numLostEvents)) 146 }