github.com/jayanthvn/pure-gobpf@v0.0.0-20230623131354-8d1d959d9e0b/pkg/ebpf_events/ring.go (about) 1 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"). 4 // You may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 //limitations under the License. 14 15 package ebpf_events 16 17 import ( 18 "encoding/binary" 19 "fmt" 20 "os" 21 "sync" 22 "sync/atomic" 23 "syscall" 24 "unsafe" 25 26 "github.com/jayanthvn/pure-gobpf/pkg/ebpf_maps" 27 "github.com/jayanthvn/pure-gobpf/pkg/logger" 28 "golang.org/x/sys/unix" 29 ) 30 31 var log = logger.Get() 32 33 const ( 34 BPF_MAP_TYPE_RINGBUF = 27 35 ) 36 37 type Ring struct { 38 Consumer_pos unsafe.Pointer 39 Consumer []byte 40 Producer_pos unsafe.Pointer 41 Producer []byte 42 Mask uint64 43 RingBufferMapFD int 44 Data unsafe.Pointer 45 } 46 47 type RingBuffer struct { 48 EpollEvent []unix.EpollEvent 49 Rings []*Ring 50 PageSize int 51 EpollFD int 52 RingCnt int 53 stopRingBufferChan chan struct{} 54 updateRingBufferChan chan *Ring 55 eventsStopChannel chan struct{} 56 wg sync.WaitGroup 57 epoll_wg sync.WaitGroup 58 eventsDataChannel chan []byte 59 } 60 61 func InitRingBuffer(mapFD int) (<-chan []byte, error) { 62 //var log = logger.Get() 63 if mapFD == -1 { 64 return nil, fmt.Errorf("Invalid map FD") 65 } 66 mapInfo, err := ebpf_maps.GetBPFmapInfo(mapFD) 67 if err != nil { 68 return nil, fmt.Errorf("Failed to map info") 69 } 70 log.Infof("Got map FD %d", mapFD) 71 if mapInfo.Type != BPF_MAP_TYPE_RINGBUF { 72 return nil, fmt.Errorf("Unsupported map type, should be - BPF_MAP_TYPE_RINGBUF") 73 } 74 75 rb := &RingBuffer{ 76 PageSize: os.Getpagesize(), 77 EpollFD: -1, 78 RingCnt: 0, 79 } 80 81 rb.EpollFD, err = unix.EpollCreate1(unix.EPOLL_CLOEXEC) 82 if err != nil { 83 return nil, fmt.Errorf("Failed to create epoll instance: %s", err) 84 } 85 86 eventsChan, err := rb.SetupRingBuffer(mapFD, mapInfo.MaxEntries) 87 if err != nil { 88 rb.CleanupRingBuffer() 89 return nil, fmt.Errorf("Failed to add ring buffer: %s", err) 90 } 91 log.Infof("Ringbuffer setup done") 92 return eventsChan, nil 93 } 94 95 func (rb *RingBuffer) SetupRingBuffer(mapFD int, maxEntries uint32) (<-chan []byte, error) { 96 ring := &Ring{ 97 RingBufferMapFD: mapFD, 98 Mask: uint64(maxEntries - 1), 99 } 100 101 // [Consumer page - 4k][Producer page - 4k][Data section - twice the size of max entries] 102 // Refer kernel code, twice the size of max entries will help in boundary scenarios 103 104 tmp, err := unix.Mmap(mapFD, 0, rb.PageSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) 105 if err != nil { 106 return nil, fmt.Errorf("Failed to create Mmap for consumer -> %d: %s", mapFD, err) 107 } 108 109 ring.Consumer_pos = unsafe.Pointer(&tmp[0]) 110 ring.Consumer = tmp 111 112 mmap_sz := uint32(rb.PageSize) + 2*maxEntries 113 tmp, err = unix.Mmap(mapFD, int64(rb.PageSize), int(mmap_sz), unix.PROT_READ, unix.MAP_SHARED) 114 if err != nil { 115 unix.Munmap(tmp) 116 return nil, fmt.Errorf("Failed to create Mmap for producer -> %d: %s", mapFD, err) 117 } 118 119 ring.Producer_pos = unsafe.Pointer(&tmp[0]) 120 ring.Producer = tmp 121 ring.Data = unsafe.Pointer(uintptr(unsafe.Pointer(&tmp[0])) + uintptr(rb.PageSize)) 122 123 epollEvent := unix.EpollEvent{ 124 Events: unix.EPOLLIN, 125 Fd: int32(rb.RingCnt), 126 } 127 128 err = unix.EpollCtl(rb.EpollFD, unix.EPOLL_CTL_ADD, mapFD, &epollEvent) 129 if err != nil { 130 unix.Munmap(tmp) 131 return nil, fmt.Errorf("Failed to Epoll event: %s", err) 132 } 133 134 rb.Rings = append(rb.Rings, ring) 135 rb.EpollEvent = append(rb.EpollEvent, epollEvent) 136 rb.RingCnt++ 137 138 //8. Start channels read 139 rb.eventsStopChannel = make(chan struct{}) 140 rb.eventsDataChannel = make(chan []byte) 141 142 rb.wg.Add(1) 143 go rb.reconcileEventsDataChannel() 144 return rb.eventsDataChannel, nil 145 } 146 147 func (rb *RingBuffer) CleanupRingBuffer() { 148 149 for i := 0; i < rb.RingCnt; i++ { 150 _ = unix.Munmap(rb.Rings[i].Producer) 151 _ = unix.Munmap(rb.Rings[i].Consumer) 152 rb.Rings[i].Producer_pos = nil 153 rb.Rings[i].Consumer_pos = nil 154 } 155 156 if rb.EpollFD >= 0 { 157 _ = syscall.Close(rb.EpollFD) 158 } 159 rb.EpollEvent = nil 160 rb.Rings = nil 161 return 162 } 163 164 func (rb *RingBuffer) reconcileEventsDataChannel() { 165 166 pollerCh := rb.EpollStart() 167 defer func() { 168 rb.wg.Done() 169 }() 170 171 for { 172 select { 173 case buffer, ok := <-pollerCh: 174 175 if !ok { 176 return 177 } 178 rb.ReadRingBuffer(buffer) 179 180 case <-rb.eventsStopChannel: 181 return 182 } 183 } 184 } 185 186 func (rb *RingBuffer) EpollStart() <-chan *Ring { 187 188 rb.stopRingBufferChan = make(chan struct{}) 189 rb.updateRingBufferChan = make(chan *Ring) 190 rb.epoll_wg.Add(1) 191 go rb.eventsPoller() 192 193 return rb.updateRingBufferChan 194 } 195 196 func (rb *RingBuffer) eventsPoller() { 197 defer rb.epoll_wg.Done() 198 for { 199 select { 200 case <-rb.stopRingBufferChan: 201 return 202 default: 203 break 204 } 205 numEvents := rb.poll(rb.EpollEvent[:rb.RingCnt]) 206 for _, event := range rb.EpollEvent[:numEvents] { 207 select { 208 case rb.updateRingBufferChan <- rb.Rings[int(event.Fd)]: 209 210 case <-rb.stopRingBufferChan: 211 return 212 } 213 } 214 } 215 } 216 217 func (rb *RingBuffer) poll(events []unix.EpollEvent) int { 218 219 timeoutMs := 150 220 n, err := unix.EpollWait(rb.EpollFD, events, timeoutMs) 221 if err != nil { 222 return 0 223 } 224 return n 225 } 226 227 func (r *Ring) getConsumerPosition() uint64 { 228 return atomic.LoadUint64((*uint64)(r.Consumer_pos)) 229 } 230 231 func (r *Ring) getProducerPosition() uint64 { 232 return atomic.LoadUint64((*uint64)(r.Producer_pos)) 233 234 } 235 236 var ringbufHeaderSize = binary.Size(ringbufHeader{}) 237 238 // ringbufHeader from 'struct bpf_ringbuf_hdr' in kernel/bpf/ringbuf.c 239 type ringbufHeader struct { 240 Len uint32 241 PgOff uint32 242 } 243 244 func memcpy(dst, src unsafe.Pointer, count uintptr) { 245 for i := uintptr(0); i < count; i++ { 246 b := *(*byte)(unsafe.Pointer(uintptr(src) + i)) 247 *(*byte)(unsafe.Pointer(uintptr(dst) + i)) = b 248 } 249 } 250 251 // Similar to libbpf poll buffer 252 func (rb *RingBuffer) ReadRingBuffer(eventRing *Ring) { 253 var done bool 254 cons_pos := eventRing.getConsumerPosition() 255 for { 256 done = true 257 prod_pos := eventRing.getProducerPosition() 258 for cons_pos < prod_pos { 259 260 //Get the header - Data points to the DataPage which will be offset by cons_pos 261 buf := (*int32)(unsafe.Pointer(uintptr(eventRing.Data) + (uintptr(cons_pos) & uintptr(eventRing.Mask)))) 262 263 //Get the len which is uint32 in header struct 264 Hdrlen := atomic.LoadInt32(buf) 265 266 //Check if busy then skip 267 if uint32(Hdrlen)&unix.BPF_RINGBUF_BUSY_BIT != 0 { 268 done = true 269 break 270 } 271 272 done = false 273 274 // Len in ringbufHeader has busy and discard bit so skip it 275 dataLen := (((uint32(Hdrlen) << 2) >> 2) + uint32(ringbufHeaderSize)) 276 //round up dataLen to nearest 8-byte alignment 277 roundedDataLen := (dataLen + 7) &^ 7 278 279 cons_pos += uint64(roundedDataLen) 280 281 if uint32(Hdrlen)&unix.BPF_RINGBUF_DISCARD_BIT == 0 { 282 readableSample := unsafe.Pointer(uintptr(unsafe.Pointer(buf)) + uintptr(ringbufHeaderSize)) 283 dataBuf := make([]byte, int(roundedDataLen)) 284 memcpy(unsafe.Pointer(&dataBuf[0]), readableSample, uintptr(roundedDataLen)) 285 rb.eventsDataChannel <- dataBuf 286 } 287 288 atomic.StoreUint64((*uint64)(eventRing.Consumer_pos), cons_pos) 289 } 290 if done { 291 break 292 } 293 } 294 }