github.com/gopacket/gopacket@v1.1.0/afpacket/afpacket.go (about)

     1  // Copyright 2012 Google, Inc. All rights reserved.
     2  //
     3  // Use of this source code is governed by a BSD-style license
     4  // that can be found in the LICENSE file in the root of the source
     5  // tree.
     6  
     7  //go:build linux
     8  // +build linux
     9  
    10  // Package afpacket provides Go bindings for MMap'd AF_PACKET socket reading.
    11  package afpacket
    12  
    13  // Couldn't have done this without:
    14  // http://lxr.free-electrons.com/source/Documentation/networking/packet_mmap.txt
    15  // http://codemonkeytips.blogspot.co.uk/2011/07/asynchronous-packet-socket-reading-with.html
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"net"
    21  	"runtime"
    22  	"sync"
    23  	"sync/atomic"
    24  	"syscall"
    25  	"time"
    26  	"unsafe"
    27  
    28  	"golang.org/x/net/bpf"
    29  	"golang.org/x/sys/unix"
    30  
    31  	"github.com/gopacket/gopacket"
    32  )
    33  
    34  var pageSize = unix.Getpagesize()
    35  
    36  // ErrPoll returned by poll
    37  var ErrPoll = errors.New("packet poll failed")
    38  
    39  // ErrTimeout returned on poll timeout
    40  var ErrTimeout = errors.New("packet poll timeout expired")
    41  
    42  // AncillaryVLAN structures are used to pass the captured VLAN
    43  // as ancillary data via CaptureInfo.
    44  type AncillaryVLAN struct {
    45  	// The VLAN VID provided by the kernel.
    46  	VLAN int
    47  }
    48  
    49  // Stats is a set of counters detailing the work TPacket has done so far.
    50  type Stats struct {
    51  	// Packets is the total number of packets returned to the caller.
    52  	Packets int64
    53  	// Polls is the number of blocking syscalls made waiting for packets.
    54  	// This should always be <= Packets, since with TPacket one syscall
    55  	// can (and often does) return many results.
    56  	Polls int64
    57  }
    58  
    59  type TpacketReqv2 struct {
    60  	blockSize uint32
    61  	blockNr   uint32
    62  	frameSize uint32
    63  	frameNr   uint32
    64  }
    65  
    66  type TpacketReqv3 struct {
    67  	blockSize      uint32
    68  	blockNr        uint32
    69  	frameSize      uint32
    70  	frameNr        uint32
    71  	retireBlkTov   uint32
    72  	sizeOfPriv     uint32
    73  	featureReqWord uint32
    74  }
    75  
    76  // SocketStats is a struct where socket stats are stored
    77  type SocketStats struct {
    78  	packets uint32
    79  	drops   uint32
    80  }
    81  
    82  // Packets returns the number of packets seen by this socket.
    83  func (s *SocketStats) Packets() uint {
    84  	return uint(s.packets)
    85  }
    86  
    87  // Drops returns the number of packets dropped on this socket.
    88  func (s *SocketStats) Drops() uint {
    89  	return uint(s.drops)
    90  }
    91  
    92  // SocketStatsV3 is a struct where socket stats for TPacketV3 are stored
    93  type SocketStatsV3 struct {
    94  	packets      uint32
    95  	drops        uint32
    96  	freezeQCount uint32
    97  }
    98  
    99  // Packets returns the number of packets seen by this socket.
   100  func (s *SocketStatsV3) Packets() uint {
   101  	return uint(s.packets)
   102  }
   103  
   104  // Drops returns the number of packets dropped on this socket.
   105  func (s *SocketStatsV3) Drops() uint {
   106  	return uint(s.drops)
   107  }
   108  
   109  // QueueFreezes returns the number of queue freezes on this socket.
   110  func (s *SocketStatsV3) QueueFreezes() uint {
   111  	return uint(s.freezeQCount)
   112  }
   113  
   114  // TPacket implements packet receiving for Linux AF_PACKET versions 1, 2, and 3.
   115  type TPacket struct {
   116  	// stats is simple statistics on TPacket's run. This MUST be the first entry to ensure alignment for sync.atomic
   117  	stats Stats
   118  	// fd is the C file descriptor.
   119  	fd int
   120  	// ring points to the memory space of the ring buffer shared by tpacket and the kernel.
   121  	ring []byte
   122  	// rawring is the unsafe pointer that we use to poll for packets
   123  	rawring unsafe.Pointer
   124  	// opts contains read-only options for the TPacket object.
   125  	opts options
   126  	mu   sync.Mutex // guards below
   127  	// offset is the offset into the ring of the current header.
   128  	offset int
   129  	// current is the current header.
   130  	current header
   131  	// shouldReleasePacket is set to true whenever we return packet data, to make sure we remember to release that data back to the kernel.
   132  	shouldReleasePacket bool
   133  	// headerNextNeeded is set to true when header need to move to the next packet. No need to move it case of poll error.
   134  	headerNextNeeded bool
   135  	// tpVersion is the version of TPacket actually in use, set by setRequestedTPacketVersion.
   136  	tpVersion OptTPacketVersion
   137  	// Hackity hack hack hack.  We need to return a pointer to the header with
   138  	// getTPacketHeader, and we don't want to allocate a v3wrapper every time,
   139  	// so we leave it in the TPacket object and return a pointer to it.
   140  	v3 v3wrapper
   141  
   142  	statsMu sync.Mutex // guards stats below
   143  	// socketStats contains stats from the socket
   144  	socketStats SocketStats
   145  	// same as socketStats, but with an extra field freeze_q_cnt
   146  	socketStatsV3 SocketStatsV3
   147  }
   148  
   149  var _ gopacket.ZeroCopyPacketDataSource = &TPacket{}
   150  
   151  // bindToInterface binds the TPacket socket to a particular named interface.
   152  func (h *TPacket) bindToInterface(ifaceName string) error {
   153  	ifIndex := 0
   154  	// An empty string here means to listen to all interfaces
   155  	if ifaceName != "" {
   156  		iface, err := net.InterfaceByName(ifaceName)
   157  		if err != nil {
   158  			return fmt.Errorf("InterfaceByName: %v", err)
   159  		}
   160  		ifIndex = iface.Index
   161  	}
   162  	s := &unix.SockaddrLinklayer{
   163  		Protocol: htons(uint16(unix.ETH_P_ALL)),
   164  		Ifindex:  ifIndex,
   165  	}
   166  	return unix.Bind(h.fd, s)
   167  }
   168  
   169  // setTPacketVersion asks the kernel to set TPacket to a particular version, and returns an error on failure.
   170  func (h *TPacket) setTPacketVersion(version OptTPacketVersion) error {
   171  	if err := unix.SetsockoptInt(h.fd, unix.SOL_PACKET, unix.PACKET_VERSION, int(version)); err != nil {
   172  		return fmt.Errorf("setsockopt packet_version: %v", err)
   173  	}
   174  	return nil
   175  }
   176  
   177  // setRequestedTPacketVersion tries to set TPacket to the requested version or versions.
   178  func (h *TPacket) setRequestedTPacketVersion() error {
   179  	switch {
   180  	case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion3) && h.setTPacketVersion(TPacketVersion3) == nil:
   181  		h.tpVersion = TPacketVersion3
   182  	case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion2) && h.setTPacketVersion(TPacketVersion2) == nil:
   183  		h.tpVersion = TPacketVersion2
   184  	case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion1) && h.setTPacketVersion(TPacketVersion1) == nil:
   185  		h.tpVersion = TPacketVersion1
   186  	default:
   187  		return errors.New("no known tpacket versions work on this machine")
   188  	}
   189  	return nil
   190  }
   191  
   192  // setUpRing sets up the shared-memory ring buffer between the user process and the kernel.
   193  func (h *TPacket) setUpRing() (err error) {
   194  	totalSize := int(h.opts.framesPerBlock * h.opts.numBlocks * h.opts.frameSize)
   195  	switch h.tpVersion {
   196  	case TPacketVersion1, TPacketVersion2:
   197  		var tp TpacketReqv2
   198  		tp.blockSize = uint32(h.opts.blockSize)
   199  		tp.blockNr = uint32(h.opts.numBlocks)
   200  		tp.frameSize = uint32(h.opts.frameSize)
   201  		tp.frameNr = uint32(h.opts.framesPerBlock * h.opts.numBlocks)
   202  		if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil {
   203  			return fmt.Errorf("setsockopt packet_rx_ring: %v", err)
   204  		}
   205  	case TPacketVersion3:
   206  		var tp TpacketReqv3
   207  		tp.blockSize = uint32(h.opts.blockSize)
   208  		tp.blockNr = uint32(h.opts.numBlocks)
   209  		tp.frameSize = uint32(h.opts.frameSize)
   210  		tp.frameNr = uint32(h.opts.framesPerBlock * h.opts.numBlocks)
   211  		tp.retireBlkTov = uint32(h.opts.blockTimeout / time.Millisecond)
   212  		if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil {
   213  			return fmt.Errorf("setsockopt packet_rx_ring v3: %v", err)
   214  		}
   215  	default:
   216  		return errors.New("invalid tpVersion")
   217  	}
   218  	h.ring, err = unix.Mmap(h.fd, 0, totalSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED)
   219  	if err != nil {
   220  		fmt.Println("here")
   221  		return err
   222  	}
   223  	if h.ring == nil {
   224  		return errors.New("no ring")
   225  	}
   226  	h.rawring = unsafe.Pointer(&h.ring[0])
   227  	return nil
   228  }
   229  
   230  // Close cleans up the TPacket.  It should not be used after the Close call.
   231  func (h *TPacket) Close() {
   232  	if h.fd == -1 {
   233  		return // already closed.
   234  	}
   235  	if h.ring != nil {
   236  		unix.Munmap(h.ring)
   237  	}
   238  	h.ring = nil
   239  	unix.Close(h.fd)
   240  	h.fd = -1
   241  	runtime.SetFinalizer(h, nil)
   242  }
   243  
   244  // NewTPacket returns a new TPacket object for reading packets off the wire.
   245  // Its behavior may be modified by passing in any/all of afpacket.Opt* to this
   246  // function.
   247  // If this function succeeds, the user should be sure to Close the returned
   248  // TPacket when finished with it.
   249  func NewTPacket(opts ...interface{}) (h *TPacket, err error) {
   250  	h = &TPacket{}
   251  	if h.opts, err = parseOptions(opts...); err != nil {
   252  		return nil, err
   253  	}
   254  	fd, err := unix.Socket(unix.AF_PACKET, int(h.opts.socktype), int(htons(unix.ETH_P_ALL)))
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  	h.fd = fd
   259  	if err = h.bindToInterface(h.opts.iface); err != nil {
   260  		goto errlbl
   261  	}
   262  	if err = h.setRequestedTPacketVersion(); err != nil {
   263  		goto errlbl
   264  	}
   265  	if err = h.setUpRing(); err != nil {
   266  		goto errlbl
   267  	}
   268  	// Clear stat counter from socket
   269  	if err = h.InitSocketStats(); err != nil {
   270  		goto errlbl
   271  	}
   272  	runtime.SetFinalizer(h, (*TPacket).Close)
   273  	return h, nil
   274  errlbl:
   275  	h.Close()
   276  	return nil, err
   277  }
   278  
   279  // SetBPF attaches a BPF filter to the underlying socket
   280  func (h *TPacket) SetBPF(filter []bpf.RawInstruction) error {
   281  	var p unix.SockFprog
   282  	if len(filter) > int(^uint16(0)) {
   283  		return errors.New("filter too large")
   284  	}
   285  	p.Len = uint16(len(filter))
   286  	p.Filter = (*unix.SockFilter)(unsafe.Pointer(&filter[0]))
   287  
   288  	return setsockopt(h.fd, unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, unsafe.Pointer(&p), unix.SizeofSockFprog)
   289  }
   290  
   291  // attach ebpf filter to af-packet
   292  func (h *TPacket) SetEBPF(progFd int32) error {
   293  	return setsockopt(h.fd, unix.SOL_SOCKET, unix.SO_ATTACH_BPF, unsafe.Pointer(&progFd), 4)
   294  }
   295  
   296  func (h *TPacket) releaseCurrentPacket() error {
   297  	h.current.clearStatus()
   298  	h.offset++
   299  	h.shouldReleasePacket = false
   300  	return nil
   301  }
   302  
   303  // ZeroCopyReadPacketData reads the next packet off the wire, and returns its data.
   304  // The slice returned by ZeroCopyReadPacketData points to bytes owned by the
   305  // TPacket.  Each call to ZeroCopyReadPacketData invalidates any data previously
   306  // returned by ZeroCopyReadPacketData.  Care must be taken not to keep pointers
   307  // to old bytes when using ZeroCopyReadPacketData... if you need to keep data past
   308  // the next time you call ZeroCopyReadPacketData, use ReadPacketData, which copies
   309  // the bytes into a new buffer for you.
   310  //
   311  //	tp, _ := NewTPacket(...)
   312  //	data1, _, _ := tp.ZeroCopyReadPacketData()
   313  //	// do everything you want with data1 here, copying bytes out of it if you'd like to keep them around.
   314  //	data2, _, _ := tp.ZeroCopyReadPacketData()  // invalidates bytes in data1
   315  func (h *TPacket) ZeroCopyReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) {
   316  	h.mu.Lock()
   317  retry:
   318  	if h.current == nil || !h.headerNextNeeded || !h.current.next() {
   319  		if h.shouldReleasePacket {
   320  			h.releaseCurrentPacket()
   321  		}
   322  		h.current = h.getTPacketHeader()
   323  		if err = h.pollForFirstPacket(h.current); err != nil {
   324  			h.headerNextNeeded = false
   325  			h.mu.Unlock()
   326  			return
   327  		}
   328  		// We received an empty block
   329  		if h.current.getLength() == 0 {
   330  			goto retry
   331  		}
   332  	}
   333  	data = h.current.getData(&h.opts)
   334  	ci.Timestamp = h.current.getTime()
   335  	ci.CaptureLength = len(data)
   336  	ci.Length = h.current.getLength()
   337  	ci.InterfaceIndex = h.current.getIfaceIndex()
   338  	vlan := h.current.getVLAN()
   339  	if vlan >= 0 {
   340  		ci.AncillaryData = append(ci.AncillaryData, AncillaryVLAN{vlan})
   341  	}
   342  	atomic.AddInt64(&h.stats.Packets, 1)
   343  	h.headerNextNeeded = true
   344  	h.mu.Unlock()
   345  
   346  	return
   347  }
   348  
   349  // Stats returns statistics on the packets the TPacket has seen so far.
   350  func (h *TPacket) Stats() (Stats, error) {
   351  	return Stats{
   352  		Polls:   atomic.LoadInt64(&h.stats.Polls),
   353  		Packets: atomic.LoadInt64(&h.stats.Packets),
   354  	}, nil
   355  }
   356  
   357  // InitSocketStats clears socket counters and return empty stats.
   358  func (h *TPacket) InitSocketStats() error {
   359  	if h.tpVersion == TPacketVersion3 {
   360  		var ssv3 SocketStatsV3
   361  		slt := uint32(unsafe.Sizeof(ssv3))
   362  
   363  		err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt)))
   364  		if err != nil {
   365  			return err
   366  		}
   367  		h.socketStatsV3 = SocketStatsV3{}
   368  	} else {
   369  		var ss SocketStats
   370  		slt := uint32(unsafe.Sizeof(ss))
   371  
   372  		err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt)))
   373  		if err != nil {
   374  			return err
   375  		}
   376  		h.socketStats = SocketStats{}
   377  	}
   378  	return nil
   379  }
   380  
   381  // SocketStats saves stats from the socket to the TPacket instance.
   382  func (h *TPacket) SocketStats() (SocketStats, SocketStatsV3, error) {
   383  	h.statsMu.Lock()
   384  	defer h.statsMu.Unlock()
   385  	// We need to save the counters since asking for the stats will clear them
   386  	if h.tpVersion == TPacketVersion3 {
   387  		var ssv3 SocketStatsV3
   388  		slt := uint32(unsafe.Sizeof(ssv3))
   389  
   390  		err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt)))
   391  		if err != nil {
   392  			return SocketStats{}, SocketStatsV3{}, err
   393  		}
   394  
   395  		h.socketStatsV3.packets += ssv3.packets
   396  		h.socketStatsV3.drops += ssv3.drops
   397  		h.socketStatsV3.freezeQCount += ssv3.freezeQCount
   398  		return h.socketStats, h.socketStatsV3, nil
   399  	}
   400  	var ss SocketStats
   401  	slt := uint32(unsafe.Sizeof(ss))
   402  
   403  	err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt)))
   404  	if err != nil {
   405  		return SocketStats{}, SocketStatsV3{}, err
   406  	}
   407  
   408  	h.socketStats.packets += ss.packets
   409  	h.socketStats.drops += ss.drops
   410  	return h.socketStats, h.socketStatsV3, nil
   411  }
   412  
   413  // ReadPacketDataTo reads packet data into a user-supplied buffer.
   414  // This function reads up to the length of the passed-in slice.
   415  // The number of bytes read into data will be returned in ci.CaptureLength,
   416  // which is the minimum of the size of the passed-in buffer and the size of
   417  // the captured packet.
   418  func (h *TPacket) ReadPacketDataTo(data []byte) (ci gopacket.CaptureInfo, err error) {
   419  	var d []byte
   420  	d, ci, err = h.ZeroCopyReadPacketData()
   421  	if err != nil {
   422  		return
   423  	}
   424  	ci.CaptureLength = copy(data, d)
   425  	return
   426  }
   427  
   428  // ReadPacketData reads the next packet, copies it into a new buffer, and returns
   429  // that buffer.  Since the buffer is allocated by ReadPacketData, it is safe for long-term
   430  // use.  This implements gopacket.PacketDataSource.
   431  func (h *TPacket) ReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) {
   432  	var d []byte
   433  	d, ci, err = h.ZeroCopyReadPacketData()
   434  	if err != nil {
   435  		return
   436  	}
   437  	data = make([]byte, len(d))
   438  	copy(data, d)
   439  	return
   440  }
   441  
   442  func (h *TPacket) getTPacketHeader() header {
   443  	switch h.tpVersion {
   444  	case TPacketVersion1:
   445  		if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks {
   446  			h.offset = 0
   447  		}
   448  		position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset)
   449  		return (*v1header)(unsafe.Pointer(position))
   450  	case TPacketVersion2:
   451  		if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks {
   452  			h.offset = 0
   453  		}
   454  		position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset)
   455  		return (*v2header)(unsafe.Pointer(position))
   456  	case TPacketVersion3:
   457  		// TPacket3 uses each block to return values, instead of each frame.  Hence we need to rotate when we hit #blocks, not #frames.
   458  		if h.offset >= h.opts.numBlocks {
   459  			h.offset = 0
   460  		}
   461  		position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset*h.opts.framesPerBlock)
   462  		h.v3 = initV3Wrapper(unsafe.Pointer(position))
   463  		return &h.v3
   464  	}
   465  	panic("handle tpacket version is invalid")
   466  }
   467  
   468  func (h *TPacket) pollForFirstPacket(hdr header) error {
   469  	tm := int(h.opts.pollTimeout / time.Millisecond)
   470  	for hdr.getStatus()&unix.TP_STATUS_USER == 0 {
   471  		pollset := [1]unix.PollFd{
   472  			{
   473  				Fd:     int32(h.fd),
   474  				Events: unix.POLLIN,
   475  			},
   476  		}
   477  		n, err := unix.Poll(pollset[:], tm)
   478  		if n == 0 {
   479  			return ErrTimeout
   480  		}
   481  
   482  		atomic.AddInt64(&h.stats.Polls, 1)
   483  		if pollset[0].Revents&unix.POLLERR > 0 {
   484  			return ErrPoll
   485  		}
   486  		if err == syscall.EINTR {
   487  			continue
   488  		}
   489  		if err != nil {
   490  			return err
   491  		}
   492  	}
   493  
   494  	h.shouldReleasePacket = true
   495  	return nil
   496  }
   497  
   498  // FanoutType determines the type of fanout to use with a TPacket SetFanout call.
   499  type FanoutType int
   500  
   501  // FanoutType values.
   502  const (
   503  	FanoutHash FanoutType = unix.PACKET_FANOUT_HASH
   504  	// It appears that defrag only works with FanoutHash, see:
   505  	// http://lxr.free-electrons.com/source/net/packet/af_packet.c#L1204
   506  	FanoutHashWithDefrag FanoutType = unix.PACKET_FANOUT_FLAG_DEFRAG
   507  	FanoutLoadBalance    FanoutType = unix.PACKET_FANOUT_LB
   508  	FanoutCPU            FanoutType = unix.PACKET_FANOUT_CPU
   509  	FanoutRollover       FanoutType = unix.PACKET_FANOUT_ROLLOVER
   510  	FanoutRandom         FanoutType = unix.PACKET_FANOUT_RND
   511  	FanoutQueueMapping   FanoutType = unix.PACKET_FANOUT_QM
   512  	FanoutCBPF           FanoutType = unix.PACKET_FANOUT_CBPF
   513  	FanoutEBPF           FanoutType = unix.PACKET_FANOUT_EBPF
   514  )
   515  
   516  // SetFanout activates TPacket's fanout ability.
   517  // Use of Fanout requires creating multiple TPacket objects and the same id/type to
   518  // a SetFanout call on each.  Note that this can be done cross-process, so if two
   519  // different processes both call SetFanout with the same type/id, they'll share
   520  // packets between them.  The same should work for multiple TPacket objects within
   521  // the same process.
   522  func (h *TPacket) SetFanout(t FanoutType, id uint16) error {
   523  	h.mu.Lock()
   524  	defer h.mu.Unlock()
   525  	arg := int(t) << 16
   526  	arg |= int(id)
   527  	return setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_FANOUT, unsafe.Pointer(&arg), unsafe.Sizeof(arg))
   528  }
   529  
   530  // WritePacketData transmits a raw packet.
   531  func (h *TPacket) WritePacketData(pkt []byte) error {
   532  	_, err := unix.Write(h.fd, pkt)
   533  	return err
   534  }