github.com/gopacket/gopacket@v1.1.0/afpacket/afpacket.go (about) 1 // Copyright 2012 Google, Inc. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the LICENSE file in the root of the source 5 // tree. 6 7 //go:build linux 8 // +build linux 9 10 // Package afpacket provides Go bindings for MMap'd AF_PACKET socket reading. 11 package afpacket 12 13 // Couldn't have done this without: 14 // http://lxr.free-electrons.com/source/Documentation/networking/packet_mmap.txt 15 // http://codemonkeytips.blogspot.co.uk/2011/07/asynchronous-packet-socket-reading-with.html 16 17 import ( 18 "errors" 19 "fmt" 20 "net" 21 "runtime" 22 "sync" 23 "sync/atomic" 24 "syscall" 25 "time" 26 "unsafe" 27 28 "golang.org/x/net/bpf" 29 "golang.org/x/sys/unix" 30 31 "github.com/gopacket/gopacket" 32 ) 33 34 var pageSize = unix.Getpagesize() 35 36 // ErrPoll returned by poll 37 var ErrPoll = errors.New("packet poll failed") 38 39 // ErrTimeout returned on poll timeout 40 var ErrTimeout = errors.New("packet poll timeout expired") 41 42 // AncillaryVLAN structures are used to pass the captured VLAN 43 // as ancillary data via CaptureInfo. 44 type AncillaryVLAN struct { 45 // The VLAN VID provided by the kernel. 46 VLAN int 47 } 48 49 // Stats is a set of counters detailing the work TPacket has done so far. 50 type Stats struct { 51 // Packets is the total number of packets returned to the caller. 52 Packets int64 53 // Polls is the number of blocking syscalls made waiting for packets. 54 // This should always be <= Packets, since with TPacket one syscall 55 // can (and often does) return many results. 56 Polls int64 57 } 58 59 type TpacketReqv2 struct { 60 blockSize uint32 61 blockNr uint32 62 frameSize uint32 63 frameNr uint32 64 } 65 66 type TpacketReqv3 struct { 67 blockSize uint32 68 blockNr uint32 69 frameSize uint32 70 frameNr uint32 71 retireBlkTov uint32 72 sizeOfPriv uint32 73 featureReqWord uint32 74 } 75 76 // SocketStats is a struct where socket stats are stored 77 type SocketStats struct { 78 packets uint32 79 drops uint32 80 } 81 82 // Packets returns the number of packets seen by this socket. 83 func (s *SocketStats) Packets() uint { 84 return uint(s.packets) 85 } 86 87 // Drops returns the number of packets dropped on this socket. 88 func (s *SocketStats) Drops() uint { 89 return uint(s.drops) 90 } 91 92 // SocketStatsV3 is a struct where socket stats for TPacketV3 are stored 93 type SocketStatsV3 struct { 94 packets uint32 95 drops uint32 96 freezeQCount uint32 97 } 98 99 // Packets returns the number of packets seen by this socket. 100 func (s *SocketStatsV3) Packets() uint { 101 return uint(s.packets) 102 } 103 104 // Drops returns the number of packets dropped on this socket. 105 func (s *SocketStatsV3) Drops() uint { 106 return uint(s.drops) 107 } 108 109 // QueueFreezes returns the number of queue freezes on this socket. 110 func (s *SocketStatsV3) QueueFreezes() uint { 111 return uint(s.freezeQCount) 112 } 113 114 // TPacket implements packet receiving for Linux AF_PACKET versions 1, 2, and 3. 115 type TPacket struct { 116 // stats is simple statistics on TPacket's run. This MUST be the first entry to ensure alignment for sync.atomic 117 stats Stats 118 // fd is the C file descriptor. 119 fd int 120 // ring points to the memory space of the ring buffer shared by tpacket and the kernel. 121 ring []byte 122 // rawring is the unsafe pointer that we use to poll for packets 123 rawring unsafe.Pointer 124 // opts contains read-only options for the TPacket object. 125 opts options 126 mu sync.Mutex // guards below 127 // offset is the offset into the ring of the current header. 128 offset int 129 // current is the current header. 130 current header 131 // shouldReleasePacket is set to true whenever we return packet data, to make sure we remember to release that data back to the kernel. 132 shouldReleasePacket bool 133 // headerNextNeeded is set to true when header need to move to the next packet. No need to move it case of poll error. 134 headerNextNeeded bool 135 // tpVersion is the version of TPacket actually in use, set by setRequestedTPacketVersion. 136 tpVersion OptTPacketVersion 137 // Hackity hack hack hack. We need to return a pointer to the header with 138 // getTPacketHeader, and we don't want to allocate a v3wrapper every time, 139 // so we leave it in the TPacket object and return a pointer to it. 140 v3 v3wrapper 141 142 statsMu sync.Mutex // guards stats below 143 // socketStats contains stats from the socket 144 socketStats SocketStats 145 // same as socketStats, but with an extra field freeze_q_cnt 146 socketStatsV3 SocketStatsV3 147 } 148 149 var _ gopacket.ZeroCopyPacketDataSource = &TPacket{} 150 151 // bindToInterface binds the TPacket socket to a particular named interface. 152 func (h *TPacket) bindToInterface(ifaceName string) error { 153 ifIndex := 0 154 // An empty string here means to listen to all interfaces 155 if ifaceName != "" { 156 iface, err := net.InterfaceByName(ifaceName) 157 if err != nil { 158 return fmt.Errorf("InterfaceByName: %v", err) 159 } 160 ifIndex = iface.Index 161 } 162 s := &unix.SockaddrLinklayer{ 163 Protocol: htons(uint16(unix.ETH_P_ALL)), 164 Ifindex: ifIndex, 165 } 166 return unix.Bind(h.fd, s) 167 } 168 169 // setTPacketVersion asks the kernel to set TPacket to a particular version, and returns an error on failure. 170 func (h *TPacket) setTPacketVersion(version OptTPacketVersion) error { 171 if err := unix.SetsockoptInt(h.fd, unix.SOL_PACKET, unix.PACKET_VERSION, int(version)); err != nil { 172 return fmt.Errorf("setsockopt packet_version: %v", err) 173 } 174 return nil 175 } 176 177 // setRequestedTPacketVersion tries to set TPacket to the requested version or versions. 178 func (h *TPacket) setRequestedTPacketVersion() error { 179 switch { 180 case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion3) && h.setTPacketVersion(TPacketVersion3) == nil: 181 h.tpVersion = TPacketVersion3 182 case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion2) && h.setTPacketVersion(TPacketVersion2) == nil: 183 h.tpVersion = TPacketVersion2 184 case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion1) && h.setTPacketVersion(TPacketVersion1) == nil: 185 h.tpVersion = TPacketVersion1 186 default: 187 return errors.New("no known tpacket versions work on this machine") 188 } 189 return nil 190 } 191 192 // setUpRing sets up the shared-memory ring buffer between the user process and the kernel. 193 func (h *TPacket) setUpRing() (err error) { 194 totalSize := int(h.opts.framesPerBlock * h.opts.numBlocks * h.opts.frameSize) 195 switch h.tpVersion { 196 case TPacketVersion1, TPacketVersion2: 197 var tp TpacketReqv2 198 tp.blockSize = uint32(h.opts.blockSize) 199 tp.blockNr = uint32(h.opts.numBlocks) 200 tp.frameSize = uint32(h.opts.frameSize) 201 tp.frameNr = uint32(h.opts.framesPerBlock * h.opts.numBlocks) 202 if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil { 203 return fmt.Errorf("setsockopt packet_rx_ring: %v", err) 204 } 205 case TPacketVersion3: 206 var tp TpacketReqv3 207 tp.blockSize = uint32(h.opts.blockSize) 208 tp.blockNr = uint32(h.opts.numBlocks) 209 tp.frameSize = uint32(h.opts.frameSize) 210 tp.frameNr = uint32(h.opts.framesPerBlock * h.opts.numBlocks) 211 tp.retireBlkTov = uint32(h.opts.blockTimeout / time.Millisecond) 212 if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil { 213 return fmt.Errorf("setsockopt packet_rx_ring v3: %v", err) 214 } 215 default: 216 return errors.New("invalid tpVersion") 217 } 218 h.ring, err = unix.Mmap(h.fd, 0, totalSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) 219 if err != nil { 220 fmt.Println("here") 221 return err 222 } 223 if h.ring == nil { 224 return errors.New("no ring") 225 } 226 h.rawring = unsafe.Pointer(&h.ring[0]) 227 return nil 228 } 229 230 // Close cleans up the TPacket. It should not be used after the Close call. 231 func (h *TPacket) Close() { 232 if h.fd == -1 { 233 return // already closed. 234 } 235 if h.ring != nil { 236 unix.Munmap(h.ring) 237 } 238 h.ring = nil 239 unix.Close(h.fd) 240 h.fd = -1 241 runtime.SetFinalizer(h, nil) 242 } 243 244 // NewTPacket returns a new TPacket object for reading packets off the wire. 245 // Its behavior may be modified by passing in any/all of afpacket.Opt* to this 246 // function. 247 // If this function succeeds, the user should be sure to Close the returned 248 // TPacket when finished with it. 249 func NewTPacket(opts ...interface{}) (h *TPacket, err error) { 250 h = &TPacket{} 251 if h.opts, err = parseOptions(opts...); err != nil { 252 return nil, err 253 } 254 fd, err := unix.Socket(unix.AF_PACKET, int(h.opts.socktype), int(htons(unix.ETH_P_ALL))) 255 if err != nil { 256 return nil, err 257 } 258 h.fd = fd 259 if err = h.bindToInterface(h.opts.iface); err != nil { 260 goto errlbl 261 } 262 if err = h.setRequestedTPacketVersion(); err != nil { 263 goto errlbl 264 } 265 if err = h.setUpRing(); err != nil { 266 goto errlbl 267 } 268 // Clear stat counter from socket 269 if err = h.InitSocketStats(); err != nil { 270 goto errlbl 271 } 272 runtime.SetFinalizer(h, (*TPacket).Close) 273 return h, nil 274 errlbl: 275 h.Close() 276 return nil, err 277 } 278 279 // SetBPF attaches a BPF filter to the underlying socket 280 func (h *TPacket) SetBPF(filter []bpf.RawInstruction) error { 281 var p unix.SockFprog 282 if len(filter) > int(^uint16(0)) { 283 return errors.New("filter too large") 284 } 285 p.Len = uint16(len(filter)) 286 p.Filter = (*unix.SockFilter)(unsafe.Pointer(&filter[0])) 287 288 return setsockopt(h.fd, unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, unsafe.Pointer(&p), unix.SizeofSockFprog) 289 } 290 291 // attach ebpf filter to af-packet 292 func (h *TPacket) SetEBPF(progFd int32) error { 293 return setsockopt(h.fd, unix.SOL_SOCKET, unix.SO_ATTACH_BPF, unsafe.Pointer(&progFd), 4) 294 } 295 296 func (h *TPacket) releaseCurrentPacket() error { 297 h.current.clearStatus() 298 h.offset++ 299 h.shouldReleasePacket = false 300 return nil 301 } 302 303 // ZeroCopyReadPacketData reads the next packet off the wire, and returns its data. 304 // The slice returned by ZeroCopyReadPacketData points to bytes owned by the 305 // TPacket. Each call to ZeroCopyReadPacketData invalidates any data previously 306 // returned by ZeroCopyReadPacketData. Care must be taken not to keep pointers 307 // to old bytes when using ZeroCopyReadPacketData... if you need to keep data past 308 // the next time you call ZeroCopyReadPacketData, use ReadPacketData, which copies 309 // the bytes into a new buffer for you. 310 // 311 // tp, _ := NewTPacket(...) 312 // data1, _, _ := tp.ZeroCopyReadPacketData() 313 // // do everything you want with data1 here, copying bytes out of it if you'd like to keep them around. 314 // data2, _, _ := tp.ZeroCopyReadPacketData() // invalidates bytes in data1 315 func (h *TPacket) ZeroCopyReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) { 316 h.mu.Lock() 317 retry: 318 if h.current == nil || !h.headerNextNeeded || !h.current.next() { 319 if h.shouldReleasePacket { 320 h.releaseCurrentPacket() 321 } 322 h.current = h.getTPacketHeader() 323 if err = h.pollForFirstPacket(h.current); err != nil { 324 h.headerNextNeeded = false 325 h.mu.Unlock() 326 return 327 } 328 // We received an empty block 329 if h.current.getLength() == 0 { 330 goto retry 331 } 332 } 333 data = h.current.getData(&h.opts) 334 ci.Timestamp = h.current.getTime() 335 ci.CaptureLength = len(data) 336 ci.Length = h.current.getLength() 337 ci.InterfaceIndex = h.current.getIfaceIndex() 338 vlan := h.current.getVLAN() 339 if vlan >= 0 { 340 ci.AncillaryData = append(ci.AncillaryData, AncillaryVLAN{vlan}) 341 } 342 atomic.AddInt64(&h.stats.Packets, 1) 343 h.headerNextNeeded = true 344 h.mu.Unlock() 345 346 return 347 } 348 349 // Stats returns statistics on the packets the TPacket has seen so far. 350 func (h *TPacket) Stats() (Stats, error) { 351 return Stats{ 352 Polls: atomic.LoadInt64(&h.stats.Polls), 353 Packets: atomic.LoadInt64(&h.stats.Packets), 354 }, nil 355 } 356 357 // InitSocketStats clears socket counters and return empty stats. 358 func (h *TPacket) InitSocketStats() error { 359 if h.tpVersion == TPacketVersion3 { 360 var ssv3 SocketStatsV3 361 slt := uint32(unsafe.Sizeof(ssv3)) 362 363 err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt))) 364 if err != nil { 365 return err 366 } 367 h.socketStatsV3 = SocketStatsV3{} 368 } else { 369 var ss SocketStats 370 slt := uint32(unsafe.Sizeof(ss)) 371 372 err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt))) 373 if err != nil { 374 return err 375 } 376 h.socketStats = SocketStats{} 377 } 378 return nil 379 } 380 381 // SocketStats saves stats from the socket to the TPacket instance. 382 func (h *TPacket) SocketStats() (SocketStats, SocketStatsV3, error) { 383 h.statsMu.Lock() 384 defer h.statsMu.Unlock() 385 // We need to save the counters since asking for the stats will clear them 386 if h.tpVersion == TPacketVersion3 { 387 var ssv3 SocketStatsV3 388 slt := uint32(unsafe.Sizeof(ssv3)) 389 390 err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt))) 391 if err != nil { 392 return SocketStats{}, SocketStatsV3{}, err 393 } 394 395 h.socketStatsV3.packets += ssv3.packets 396 h.socketStatsV3.drops += ssv3.drops 397 h.socketStatsV3.freezeQCount += ssv3.freezeQCount 398 return h.socketStats, h.socketStatsV3, nil 399 } 400 var ss SocketStats 401 slt := uint32(unsafe.Sizeof(ss)) 402 403 err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt))) 404 if err != nil { 405 return SocketStats{}, SocketStatsV3{}, err 406 } 407 408 h.socketStats.packets += ss.packets 409 h.socketStats.drops += ss.drops 410 return h.socketStats, h.socketStatsV3, nil 411 } 412 413 // ReadPacketDataTo reads packet data into a user-supplied buffer. 414 // This function reads up to the length of the passed-in slice. 415 // The number of bytes read into data will be returned in ci.CaptureLength, 416 // which is the minimum of the size of the passed-in buffer and the size of 417 // the captured packet. 418 func (h *TPacket) ReadPacketDataTo(data []byte) (ci gopacket.CaptureInfo, err error) { 419 var d []byte 420 d, ci, err = h.ZeroCopyReadPacketData() 421 if err != nil { 422 return 423 } 424 ci.CaptureLength = copy(data, d) 425 return 426 } 427 428 // ReadPacketData reads the next packet, copies it into a new buffer, and returns 429 // that buffer. Since the buffer is allocated by ReadPacketData, it is safe for long-term 430 // use. This implements gopacket.PacketDataSource. 431 func (h *TPacket) ReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) { 432 var d []byte 433 d, ci, err = h.ZeroCopyReadPacketData() 434 if err != nil { 435 return 436 } 437 data = make([]byte, len(d)) 438 copy(data, d) 439 return 440 } 441 442 func (h *TPacket) getTPacketHeader() header { 443 switch h.tpVersion { 444 case TPacketVersion1: 445 if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks { 446 h.offset = 0 447 } 448 position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset) 449 return (*v1header)(unsafe.Pointer(position)) 450 case TPacketVersion2: 451 if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks { 452 h.offset = 0 453 } 454 position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset) 455 return (*v2header)(unsafe.Pointer(position)) 456 case TPacketVersion3: 457 // TPacket3 uses each block to return values, instead of each frame. Hence we need to rotate when we hit #blocks, not #frames. 458 if h.offset >= h.opts.numBlocks { 459 h.offset = 0 460 } 461 position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset*h.opts.framesPerBlock) 462 h.v3 = initV3Wrapper(unsafe.Pointer(position)) 463 return &h.v3 464 } 465 panic("handle tpacket version is invalid") 466 } 467 468 func (h *TPacket) pollForFirstPacket(hdr header) error { 469 tm := int(h.opts.pollTimeout / time.Millisecond) 470 for hdr.getStatus()&unix.TP_STATUS_USER == 0 { 471 pollset := [1]unix.PollFd{ 472 { 473 Fd: int32(h.fd), 474 Events: unix.POLLIN, 475 }, 476 } 477 n, err := unix.Poll(pollset[:], tm) 478 if n == 0 { 479 return ErrTimeout 480 } 481 482 atomic.AddInt64(&h.stats.Polls, 1) 483 if pollset[0].Revents&unix.POLLERR > 0 { 484 return ErrPoll 485 } 486 if err == syscall.EINTR { 487 continue 488 } 489 if err != nil { 490 return err 491 } 492 } 493 494 h.shouldReleasePacket = true 495 return nil 496 } 497 498 // FanoutType determines the type of fanout to use with a TPacket SetFanout call. 499 type FanoutType int 500 501 // FanoutType values. 502 const ( 503 FanoutHash FanoutType = unix.PACKET_FANOUT_HASH 504 // It appears that defrag only works with FanoutHash, see: 505 // http://lxr.free-electrons.com/source/net/packet/af_packet.c#L1204 506 FanoutHashWithDefrag FanoutType = unix.PACKET_FANOUT_FLAG_DEFRAG 507 FanoutLoadBalance FanoutType = unix.PACKET_FANOUT_LB 508 FanoutCPU FanoutType = unix.PACKET_FANOUT_CPU 509 FanoutRollover FanoutType = unix.PACKET_FANOUT_ROLLOVER 510 FanoutRandom FanoutType = unix.PACKET_FANOUT_RND 511 FanoutQueueMapping FanoutType = unix.PACKET_FANOUT_QM 512 FanoutCBPF FanoutType = unix.PACKET_FANOUT_CBPF 513 FanoutEBPF FanoutType = unix.PACKET_FANOUT_EBPF 514 ) 515 516 // SetFanout activates TPacket's fanout ability. 517 // Use of Fanout requires creating multiple TPacket objects and the same id/type to 518 // a SetFanout call on each. Note that this can be done cross-process, so if two 519 // different processes both call SetFanout with the same type/id, they'll share 520 // packets between them. The same should work for multiple TPacket objects within 521 // the same process. 522 func (h *TPacket) SetFanout(t FanoutType, id uint16) error { 523 h.mu.Lock() 524 defer h.mu.Unlock() 525 arg := int(t) << 16 526 arg |= int(id) 527 return setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_FANOUT, unsafe.Pointer(&arg), unsafe.Sizeof(arg)) 528 } 529 530 // WritePacketData transmits a raw packet. 531 func (h *TPacket) WritePacketData(pkt []byte) error { 532 _, err := unix.Write(h.fd, pkt) 533 return err 534 }