github.com/cilium/cilium@v1.16.2/pkg/hubble/recorder/sink/dispatch.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package sink 5 6 import ( 7 "context" 8 "fmt" 9 "runtime" 10 11 "github.com/sirupsen/logrus" 12 "golang.org/x/sys/unix" 13 14 "github.com/cilium/cilium/pkg/byteorder" 15 "github.com/cilium/cilium/pkg/hubble/recorder/pcap" 16 "github.com/cilium/cilium/pkg/lock" 17 "github.com/cilium/cilium/pkg/logging" 18 "github.com/cilium/cilium/pkg/logging/logfields" 19 "github.com/cilium/cilium/pkg/monitor" 20 monitorAPI "github.com/cilium/cilium/pkg/monitor/api" 21 "github.com/cilium/cilium/pkg/time" 22 ) 23 24 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "recorder-sink") 25 26 // record is a captured packet which will be written to file in the pcap format 27 type record struct { 28 timestamp time.Time 29 ruleID uint16 30 inclLen uint32 31 origLen uint32 32 data []byte 33 } 34 35 // Handle enables the owner to subscribe to sink statistics 36 type Handle struct { 37 // StatsUpdated is a channel on which receives a new empty message whenever 38 // there was an update to the sink statistics. 39 StatsUpdated <-chan struct{} 40 // Done is a channel which is closed when this sink has been shut down. 41 Done <-chan struct{} 42 43 sink *sink 44 } 45 46 // Stats returns the latest statistics for this sink. 47 func (h *Handle) Stats() Statistics { 48 return h.sink.copyStats() 49 } 50 51 // Stop requests the underlying sink to stop. Handle.Done will be closed 52 // once the sink has drained its queue and stopped. 53 func (h *Handle) Stop() { 54 h.sink.stop() 55 } 56 57 // Err returns the last error on this sink once the channel has stopped 58 func (h *Handle) Err() error { 59 return h.sink.err() 60 } 61 62 // Statistics contains the statistics for a pcap sink 63 type Statistics struct { 64 PacketsWritten uint64 65 BytesWritten uint64 66 PacketsLost uint64 67 BytesLost uint64 68 } 69 70 // StopConditions defines a set of values which cause the sink to stop 71 // recording if any of them are hit. Zero-valued conditions are ignored. 72 type StopConditions struct { 73 PacketsCaptured uint64 74 BytesCaptured uint64 75 DurationElapsed time.Duration 76 } 77 78 // PcapSink defines the parameters of a sink which writes to a pcap.RecordWriter 79 type PcapSink struct { 80 RuleID uint16 81 Header pcap.Header 82 Writer pcap.RecordWriter 83 StopCondition StopConditions 84 } 85 86 // Dispatch implements consumer.MonitorConsumer and dispatches incoming 87 // recorder captures to registered sinks based on their rule ID. 88 type Dispatch struct { 89 mutex lock.RWMutex 90 91 bootTimeOffset int64 92 93 sinkQueueSize int 94 sinkByRuleID map[uint16]*sink 95 } 96 97 // NewDispatch creates a new sink dispatcher. Each registered sink may have a 98 // queue of up to sinkQueueSize pending captures. 99 func NewDispatch(sinkQueueSize int) (*Dispatch, error) { 100 if sinkQueueSize < 1 { 101 return nil, fmt.Errorf("invalid sink queue size: %d", sinkQueueSize) 102 } 103 104 bootTimeOffset, err := estimateBootTimeOffset() 105 if err != nil { 106 return nil, fmt.Errorf("failed to obtain boot time clock: %w", err) 107 } 108 109 return &Dispatch{ 110 bootTimeOffset: bootTimeOffset, 111 sinkQueueSize: sinkQueueSize, 112 sinkByRuleID: map[uint16]*sink{}, 113 }, nil 114 } 115 116 // StartSink starts a new sink for the pcap sink configuration p. Any 117 // captures with a matching rule ID will be forwarded to the pcap sink p.Writer. 118 // The provided p.Header is written to the pcap sink during initialization. 119 // The sink is unregistered automatically when it stops. A sink is stopped for 120 // one of the following four reasons. In all cases, Handle.Done will be closed. 121 // - Explicitly via Handle.Stop (Handle.Err() == nil) 122 // - When one of the p.StopCondition is hit (Handle.Err() == nil) 123 // - When the context ctx is cancelled (Handle.Err() != nil) 124 // - When an error occurred (Handle.Err() != nil) 125 func (d *Dispatch) StartSink(ctx context.Context, p PcapSink) (*Handle, error) { 126 d.mutex.Lock() 127 defer d.mutex.Unlock() 128 129 if _, ok := d.sinkByRuleID[p.RuleID]; ok { 130 return nil, fmt.Errorf("sink for rule id %d already registered", p.RuleID) 131 } 132 133 s := startSink(ctx, p, d.sinkQueueSize) 134 d.sinkByRuleID[p.RuleID] = s 135 136 go func() { 137 <-s.done 138 d.mutex.Lock() 139 delete(d.sinkByRuleID, p.RuleID) 140 d.mutex.Unlock() 141 }() 142 143 return &Handle{ 144 StatsUpdated: s.trigger, 145 Done: s.done, 146 sink: s, 147 }, nil 148 } 149 150 func (d *Dispatch) decodeRecordCaptureLocked(data []byte) (rec record, err error) { 151 dataLen := uint32(len(data)) 152 if dataLen < monitor.RecorderCaptureLen { 153 return record{}, fmt.Errorf("not enough data to decode capture message: %d", dataLen) 154 } 155 156 // This needs to stay in sync with struct capture_msg from 157 // bpf/include/pcap.h. 158 // We could use binary.Read on monitor.RecorderCapture, but since it 159 // requires reflection, it is too slow to use on the critical path here. 160 const ( 161 offsetRuleID = 2 162 offsetTimeBoot = 8 163 offsetCaptureLength = 16 164 offsetOriginalLength = 20 165 ) 166 n := byteorder.Native 167 ruleID := n.Uint16(data[offsetRuleID:]) 168 timeBoot := n.Uint64(data[offsetTimeBoot:]) 169 capLen := n.Uint32(data[offsetCaptureLength:]) 170 origLen := n.Uint32(data[offsetOriginalLength:]) 171 172 // data may contain trailing garbage from the perf ring buffer 173 // https://lore.kernel.org/patchwork/patch/1244339/ 174 packetEnd := monitor.RecorderCaptureLen + capLen 175 if dataLen < packetEnd { 176 return record{}, fmt.Errorf("capture record too short: want:%d < got:%d", dataLen, packetEnd) 177 } 178 packet := data[monitor.RecorderCaptureLen:packetEnd] 179 180 return record{ 181 timestamp: time.Unix(0, d.bootTimeOffset+int64(timeBoot)), 182 ruleID: ruleID, 183 inclLen: capLen, 184 origLen: origLen, 185 data: packet, 186 }, nil 187 } 188 189 const estimationRounds = 25 190 191 func estimateBootTimeOffset() (bootTimeOffset int64, err error) { 192 // The datapath is currently using ktime_get_boot_ns for the pcap timestamp, 193 // which corresponds to CLOCK_BOOTTIME. To be able to convert the the 194 // CLOCK_BOOTTIME to CLOCK_REALTIME (i.e. a unix timestamp). 195 196 // There can be an arbitrary amount of time between the execution of 197 // time.Now() and unix.ClockGettime() below, especially under scheduler 198 // pressure during program startup. To reduce the error introduced by these 199 // delays, we pin the current Go routine to its OS thread and measure the 200 // clocks multiple times, taking only the smallest observed difference 201 // between the two values (which implies the smallest possible delay 202 // between the two snapshots). 203 var minDiff int64 = 1<<63 - 1 204 205 runtime.LockOSThread() 206 defer runtime.UnlockOSThread() 207 for round := 0; round < estimationRounds; round++ { 208 var bootTimespec unix.Timespec 209 210 // Ideally we would use __vdso_clock_gettime for both clocks here, 211 // to have as little overhead as possible. 212 // time.Now() will actually use VDSO on Go 1.9+, but calling 213 // unix.ClockGettime to obtain CLOCK_BOOTTIME is a regular system call 214 // for now. 215 unixTime := time.Now() 216 err = unix.ClockGettime(unix.CLOCK_BOOTTIME, &bootTimespec) 217 if err != nil { 218 return 0, err 219 } 220 221 offset := unixTime.UnixNano() - bootTimespec.Nano() 222 diff := offset 223 if diff < 0 { 224 diff = -diff 225 } 226 227 if diff < minDiff { 228 minDiff = diff 229 bootTimeOffset = offset 230 } 231 } 232 233 return bootTimeOffset, nil 234 } 235 236 // NotifyPerfEvent implements consumer.MonitorConsumer 237 func (d *Dispatch) NotifyPerfEvent(data []byte, cpu int) { 238 if len(data) == 0 || data[0] != monitorAPI.MessageTypeRecCapture { 239 return 240 } 241 242 d.mutex.Lock() 243 defer d.mutex.Unlock() 244 245 rec, err := d.decodeRecordCaptureLocked(data) 246 if err != nil { 247 log.WithError(err).Warning("Failed to parse capture record") 248 return 249 } 250 251 // We silently drop records with unknown rule ids 252 if s, ok := d.sinkByRuleID[rec.ruleID]; ok { 253 s.enqueue(rec) 254 } 255 } 256 257 // NotifyPerfEventLost implements consumer.MonitorConsumer 258 func (d *Dispatch) NotifyPerfEventLost(numLostEvents uint64, cpu int) { 259 log.WithFields(logrus.Fields{ 260 "numEvents": numLostEvents, 261 "cpu": cpu, 262 }).Warning("Perf ring buffer events lost. This may affect captured packets.") 263 } 264 265 // NotifyAgentEvent implements consumer.MonitorConsumer 266 func (d *Dispatch) NotifyAgentEvent(typ int, message interface{}) { 267 // ignored 268 }