github.com/vishvananda/netlink@v1.3.1/conntrack_linux.go (about)

     1  package netlink
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"io/fs"
     9  	"net"
    10  	"time"
    11  
    12  	"github.com/vishvananda/netlink/nl"
    13  	"golang.org/x/sys/unix"
    14  )
    15  
    16  // ConntrackTableType Conntrack table for the netlink operation
    17  type ConntrackTableType uint8
    18  
    19  const (
    20  	// ConntrackTable Conntrack table
    21  	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK		 1
    22  	ConntrackTable = 1
    23  	// ConntrackExpectTable Conntrack expect table
    24  	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
    25  	ConntrackExpectTable = 2
    26  )
    27  
    28  const (
    29  	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
    30  	seekCurrent = 1
    31  )
    32  
    33  // InetFamily Family type
    34  type InetFamily uint8
    35  
    36  //  -L [table] [options]          List conntrack or expectation table
    37  //  -G [table] parameters         Get conntrack or expectation
    38  
    39  //  -I [table] parameters         Create a conntrack or expectation
    40  //  -U [table] parameters         Update a conntrack
    41  //  -E [table] [options]          Show events
    42  
    43  //  -C [table]                    Show counter
    44  //  -S                            Show statistics
    45  
    46  // ConntrackTableList returns the flow list of a table of a specific family
    47  // conntrack -L [table] [options]          List conntrack or expectation table
    48  //
    49  // If the returned error is [ErrDumpInterrupted], results may be inconsistent
    50  // or incomplete.
    51  func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
    52  	return pkgHandle.ConntrackTableList(table, family)
    53  }
    54  
    55  // ConntrackTableFlush flushes all the flows of a specified table
    56  // conntrack -F [table]            Flush table
    57  // The flush operation applies to all the family types
    58  func ConntrackTableFlush(table ConntrackTableType) error {
    59  	return pkgHandle.ConntrackTableFlush(table)
    60  }
    61  
    62  // ConntrackCreate creates a new conntrack flow in the desired table
    63  // conntrack -I [table]		Create a conntrack or expectation
    64  func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
    65  	return pkgHandle.ConntrackCreate(table, family, flow)
    66  }
    67  
    68  // ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
    69  // conntrack -U [table]		Update a conntrack
    70  func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
    71  	return pkgHandle.ConntrackUpdate(table, family, flow)
    72  }
    73  
    74  // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
    75  // conntrack -D [table] parameters         Delete conntrack or expectation
    76  //
    77  // Deprecated: use [ConntrackDeleteFilters] instead.
    78  func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
    79  	return pkgHandle.ConntrackDeleteFilters(table, family, filter)
    80  }
    81  
    82  // ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters
    83  // conntrack -D [table] parameters         Delete conntrack or expectation
    84  func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
    85  	return pkgHandle.ConntrackDeleteFilters(table, family, filters...)
    86  }
    87  
    88  // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
    89  // conntrack -L [table] [options]          List conntrack or expectation table
    90  //
    91  // If the returned error is [ErrDumpInterrupted], results may be inconsistent
    92  // or incomplete.
    93  func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
    94  	res, executeErr := h.dumpConntrackTable(table, family)
    95  	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
    96  		return nil, executeErr
    97  	}
    98  
    99  	// Deserialize all the flows
   100  	var result []*ConntrackFlow
   101  	for _, dataRaw := range res {
   102  		result = append(result, parseRawData(dataRaw))
   103  	}
   104  
   105  	return result, executeErr
   106  }
   107  
   108  // ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
   109  // conntrack -F [table]            Flush table
   110  // The flush operation applies to all the family types
   111  func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
   112  	req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
   113  	_, err := req.Execute(unix.NETLINK_NETFILTER, 0)
   114  	return err
   115  }
   116  
   117  // ConntrackCreate creates a new conntrack flow in the desired table using the handle
   118  // conntrack -I [table]		Create a conntrack or expectation
   119  func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
   120  	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE)
   121  	attr, err := flow.toNlData()
   122  	if err != nil {
   123  		return err
   124  	}
   125  
   126  	for _, a := range attr {
   127  		req.AddData(a)
   128  	}
   129  
   130  	_, err = req.Execute(unix.NETLINK_NETFILTER, 0)
   131  	return err
   132  }
   133  
   134  // ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
   135  // conntrack -U [table]		Update a conntrack
   136  func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
   137  	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE)
   138  	attr, err := flow.toNlData()
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	for _, a := range attr {
   144  		req.AddData(a)
   145  	}
   146  
   147  	_, err = req.Execute(unix.NETLINK_NETFILTER, 0)
   148  	return err
   149  }
   150  
   151  // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
   152  // conntrack -D [table] parameters         Delete conntrack or expectation
   153  //
   154  // Deprecated: use [Handle.ConntrackDeleteFilters] instead.
   155  func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
   156  	return h.ConntrackDeleteFilters(table, family, filter)
   157  }
   158  
   159  // ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed
   160  // conntrack -D [table] parameters         Delete conntrack or expectation
   161  func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
   162  	var finalErr error
   163  	res, err := h.dumpConntrackTable(table, family)
   164  	if err != nil {
   165  		if !errors.Is(err, ErrDumpInterrupted) {
   166  			return 0, err
   167  		}
   168  		// This allows us to at least do a best effort to try to clean the
   169  		// entries matching the filter.
   170  		finalErr = err
   171  	}
   172  
   173  	var totalFilterErrors int
   174  	var matched uint
   175  	for _, dataRaw := range res {
   176  		flow := parseRawData(dataRaw)
   177  		for _, filter := range filters {
   178  			if match := filter.MatchConntrackFlow(flow); match {
   179  				req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
   180  				// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
   181  				req2.AddRawData(dataRaw[4:])
   182  				if _, err = req2.Execute(unix.NETLINK_NETFILTER, 0); err == nil || errors.Is(err, fs.ErrNotExist) {
   183  					matched++
   184  					// flow is already deleted, no need to match on other filters and continue to the next flow.
   185  					break
   186  				} else {
   187  					totalFilterErrors++
   188  				}
   189  			}
   190  		}
   191  	}
   192  	if totalFilterErrors > 0 {
   193  		finalErr = errors.Join(finalErr, fmt.Errorf("failed to delete %d conntrack flows with %d filters", totalFilterErrors, len(filters)))
   194  	}
   195  	return matched, finalErr
   196  }
   197  
   198  func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
   199  	// Create the Netlink request object
   200  	req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
   201  	// Add the netfilter header
   202  	msg := &nl.Nfgenmsg{
   203  		NfgenFamily: uint8(family),
   204  		Version:     nl.NFNETLINK_V0,
   205  		ResId:       0,
   206  	}
   207  	req.AddData(msg)
   208  	return req
   209  }
   210  
   211  func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
   212  	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP)
   213  	return req.Execute(unix.NETLINK_NETFILTER, 0)
   214  }
   215  
   216  // ProtoInfo wraps an L4-protocol structure - roughly corresponds to the
   217  // __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h.
   218  // Currently, only protocol names, and TCP state is supported.
   219  type ProtoInfo interface {
   220  	Protocol() string
   221  }
   222  
   223  // ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union.
   224  // Only TCP state is currently supported.
   225  type ProtoInfoTCP struct {
   226  	State uint8
   227  }
   228  // Protocol returns "tcp".
   229  func (*ProtoInfoTCP) Protocol() string {return "tcp"}
   230  func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) {
   231  	ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{})
   232  	ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{})
   233  	ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State))
   234  	ctProtoInfoTCP.AddChild(ctProtoInfoTCPState)
   235  	ctProtoInfo.AddChild(ctProtoInfoTCP)
   236  
   237  	return []*nl.RtAttr{ctProtoInfo}, nil
   238  }
   239  
   240  // ProtoInfoSCTP only supports the protocol name.
   241  type ProtoInfoSCTP struct {}
   242  // Protocol returns "sctp".
   243  func (*ProtoInfoSCTP) Protocol() string {return "sctp"}
   244  
   245  // ProtoInfoDCCP only supports the protocol name.
   246  type ProtoInfoDCCP struct {}
   247  // Protocol returns "dccp".
   248  func (*ProtoInfoDCCP) Protocol() string {return "dccp"}
   249  
   250  // The full conntrack flow structure is very complicated and can be found in the file:
   251  // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
   252  // For the time being, the structure below allows to parse and extract the base information of a flow
   253  type IPTuple struct {
   254  	Bytes    uint64
   255  	DstIP    net.IP
   256  	DstPort  uint16
   257  	Packets  uint64
   258  	Protocol uint8
   259  	SrcIP    net.IP
   260  	SrcPort  uint16
   261  }
   262  
   263  // toNlData generates the inner fields of a nested tuple netlink datastructure
   264  // does not generate the "nested"-flagged outer message.
   265  func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) {
   266  
   267  	var srcIPsFlag, dstIPsFlag int
   268  	if family == nl.FAMILY_V4 {
   269  		srcIPsFlag = nl.CTA_IP_V4_SRC
   270  		dstIPsFlag = nl.CTA_IP_V4_DST
   271  	} else if family == nl.FAMILY_V6 {
   272  		srcIPsFlag = nl.CTA_IP_V6_SRC
   273  		dstIPsFlag = nl.CTA_IP_V6_DST
   274  	} else {
   275  		return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family)
   276  	}
   277  
   278  	ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil)
   279  	ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP)
   280  	ctTupleIP.AddChild(ctTupleIPSrc)
   281  	ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP)
   282  	ctTupleIP.AddChild(ctTupleIPDst)
   283  
   284  	ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil)
   285  	ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol})
   286  	ctTupleProto.AddChild(ctTupleProtoNum)
   287  	ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort))
   288  	ctTupleProto.AddChild(ctTupleProtoSrcPort)
   289  	ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort))
   290  	ctTupleProto.AddChild(ctTupleProtoDstPort, )
   291  
   292  	return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil
   293  }
   294  
   295  type ConntrackFlow struct {
   296  	FamilyType uint8
   297  	Forward    IPTuple
   298  	Reverse    IPTuple
   299  	Mark       uint32
   300  	Zone       uint16
   301  	TimeStart  uint64
   302  	TimeStop   uint64
   303  	TimeOut    uint32
   304  	Labels     []byte
   305  	ProtoInfo  ProtoInfo
   306  }
   307  
   308  func (s *ConntrackFlow) String() string {
   309  	// conntrack cmd output:
   310  	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100
   311  	//             start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec)
   312  	start := time.Unix(0, int64(s.TimeStart))
   313  	stop := time.Unix(0, int64(s.TimeStop))
   314  	timeout := int32(s.TimeOut)
   315  	res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ",
   316  		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
   317  		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
   318  		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
   319  		s.Mark)
   320  	if len(s.Labels) > 0 {
   321  		res += fmt.Sprintf("labels=0x%x ", s.Labels)
   322  	}
   323  	if s.Zone != 0 {
   324  		res += fmt.Sprintf("zone=%d ", s.Zone)
   325  	}
   326  	res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout)
   327  	return res
   328  }
   329  
   330  // toNlData generates netlink messages representing the flow.
   331  func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) {
   332  	var payload []*nl.RtAttr
   333  	// The message structure is built as follows:
   334  	//	<len, NLA_F_NESTED|CTA_TUPLE_ORIG>
   335  	//		<len, NLA_F_NESTED|CTA_TUPLE_IP>
   336  	//			<len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
   337  	//			<IP>
   338  	//			<len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
   339  	//			<IP>
   340  	//		<len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
   341  	//			<len, CTA_PROTO_NUM>
   342  	//			<uint8>
   343  	//			<len, CTA_PROTO_SRC_PORT>
   344  	//			<BEuint16>
   345  	//			<len, CTA_PROTO_DST_PORT>
   346  	//			<BEuint16>
   347  	// 	<len, NLA_F_NESTED|CTA_TUPLE_REPLY>
   348  	//		<len, NLA_F_NESTED|CTA_TUPLE_IP>
   349  	//			<len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
   350  	//			<IP>
   351  	//			<len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
   352  	//			<IP>
   353  	//		<len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
   354  	//			<len, CTA_PROTO_NUM>
   355  	//			<uint8>
   356  	//			<len, CTA_PROTO_SRC_PORT>
   357  	//			<BEuint16>
   358  	//			<len, CTA_PROTO_DST_PORT>
   359  	//			<BEuint16>
   360  	//	<len, CTA_STATUS>
   361  	//	<uint64>
   362  	//	<len, CTA_MARK>
   363  	//	<BEuint64>
   364  	//	<len, CTA_TIMEOUT>
   365  	//	<BEuint64>
   366  	//	<len, NLA_F_NESTED|CTA_PROTOINFO>
   367   
   368  	// CTA_TUPLE_ORIG
   369  	ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil)
   370  	forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType)
   371  	if err != nil {
   372  		return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err)
   373  	}
   374  	for _, a := range forwardFlowAttrs {
   375  		ctTupleOrig.AddChild(a)
   376  	}
   377  
   378  	// CTA_TUPLE_REPLY
   379  	ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil)
   380  	reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType)
   381  	if err != nil {
   382  		return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err)
   383  	}
   384  	for _, a := range reverseFlowAttrs {
   385  		ctTupleReply.AddChild(a)
   386  	}
   387  
   388  	ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark))
   389  	ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut))
   390  
   391  	payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout)
   392  
   393  	if s.ProtoInfo != nil {
   394  		switch p := s.ProtoInfo.(type) {
   395  		case *ProtoInfoTCP:
   396  			attrs, err := p.toNlData()
   397  			if err != nil {
   398  				return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err)
   399  			}
   400  			payload = append(payload, attrs...)
   401  		default:
   402  			return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil")
   403  		}
   404  	}
   405  
   406  	return payload, nil
   407  }
   408  
   409  // This method parse the ip tuple structure
   410  // The message structure is the following:
   411  // <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
   412  // <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
   413  // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
   414  // <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
   415  // <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
   416  func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 {
   417  	for i := 0; i < 2; i++ {
   418  		_, t, _, v := parseNfAttrTLV(reader)
   419  		switch t {
   420  		case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
   421  			tpl.SrcIP = v
   422  		case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
   423  			tpl.DstIP = v
   424  		}
   425  	}
   426  	// Get total length of nested protocol-specific info.
   427  	_, _, protoInfoTotalLen := parseNfAttrTL(reader)
   428  	_, t, l, v := parseNfAttrTLV(reader)
   429  	// Track the number of bytes read.
   430  	protoInfoBytesRead := uint16(nl.SizeofNfattr) + l
   431  	if t == nl.CTA_PROTO_NUM {
   432  		tpl.Protocol = uint8(v[0])
   433  	}
   434  	// We only parse TCP & UDP headers. Skip the others.
   435  	if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP {
   436  		// skip the rest
   437  		bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
   438  		reader.Seek(int64(bytesRemaining), seekCurrent)
   439  		return tpl.Protocol
   440  	}
   441  	// Skip 3 bytes of padding
   442  	reader.Seek(3, seekCurrent)
   443  	protoInfoBytesRead += 3
   444  	for i := 0; i < 2; i++ {
   445  		_, t, _ := parseNfAttrTL(reader)
   446  		protoInfoBytesRead += uint16(nl.SizeofNfattr)
   447  		switch t {
   448  		case nl.CTA_PROTO_SRC_PORT:
   449  			parseBERaw16(reader, &tpl.SrcPort)
   450  			protoInfoBytesRead += 2
   451  		case nl.CTA_PROTO_DST_PORT:
   452  			parseBERaw16(reader, &tpl.DstPort)
   453  			protoInfoBytesRead += 2
   454  		}
   455  		// Skip 2 bytes of padding
   456  		reader.Seek(2, seekCurrent)
   457  		protoInfoBytesRead += 2
   458  	}
   459  	// Skip any remaining/unknown parts of the message
   460  	bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
   461  	reader.Seek(int64(bytesRemaining), seekCurrent)
   462  
   463  	return tpl.Protocol
   464  }
   465  
   466  func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
   467  	isNested, attrType, len = parseNfAttrTL(r)
   468  
   469  	value = make([]byte, len)
   470  	binary.Read(r, binary.BigEndian, &value)
   471  	return isNested, attrType, len, value
   472  }
   473  
   474  func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
   475  	binary.Read(r, nl.NativeEndian(), &len)
   476  	len -= nl.SizeofNfattr
   477  
   478  	binary.Read(r, nl.NativeEndian(), &attrType)
   479  	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
   480  	attrType = attrType & (nl.NLA_F_NESTED - 1)
   481  	return isNested, attrType, len
   482  }
   483  
   484  // skipNfAttrValue seeks `r` past attr of length `len`.
   485  // Maintains buffer alignment.
   486  // Returns length of the seek performed.
   487  func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 {
   488  	len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1)
   489  	r.Seek(int64(len), seekCurrent)
   490  	return len
   491  }
   492  
   493  func parseBERaw16(r *bytes.Reader, v *uint16) {
   494  	binary.Read(r, binary.BigEndian, v)
   495  }
   496  
   497  func parseBERaw32(r *bytes.Reader, v *uint32) {
   498  	binary.Read(r, binary.BigEndian, v)
   499  }
   500  
   501  func parseBERaw64(r *bytes.Reader, v *uint64) {
   502  	binary.Read(r, binary.BigEndian, v)
   503  }
   504  
   505  func parseRaw32(r *bytes.Reader, v *uint32) {
   506  	binary.Read(r, nl.NativeEndian(), v)
   507  }
   508  
   509  func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
   510  	for i := 0; i < 2; i++ {
   511  		switch _, t, _ := parseNfAttrTL(r); t {
   512  		case nl.CTA_COUNTERS_BYTES:
   513  			parseBERaw64(r, &bytes)
   514  		case nl.CTA_COUNTERS_PACKETS:
   515  			parseBERaw64(r, &packets)
   516  		default:
   517  			return
   518  		}
   519  	}
   520  	return
   521  }
   522  
   523  // when the flow is alive, only the timestamp_start is returned in structure
   524  func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) {
   525  	var numTimeStamps int
   526  	oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp
   527  	if readSize == uint16(oneItem) {
   528  		numTimeStamps = 1
   529  	} else if readSize == 2*uint16(oneItem) {
   530  		numTimeStamps = 2
   531  	} else {
   532  		return
   533  	}
   534  	for i := 0; i < numTimeStamps; i++ {
   535  		switch _, t, _ := parseNfAttrTL(r); t {
   536  		case nl.CTA_TIMESTAMP_START:
   537  			parseBERaw64(r, &tstart)
   538  		case nl.CTA_TIMESTAMP_STOP:
   539  			parseBERaw64(r, &tstop)
   540  		default:
   541  			return
   542  		}
   543  	}
   544  	return
   545  
   546  }
   547  
   548  func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) {
   549  	binary.Read(r, binary.BigEndian, &s)
   550  	r.Seek(nl.SizeofNfattr - 1, seekCurrent)
   551  	return s
   552  }
   553  
   554  // parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr.
   555  func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) {
   556  	p := new(ProtoInfoTCP)
   557  	bytesRead := 0
   558  	for bytesRead < int(attrLen) {
   559  		_, t, l := parseNfAttrTL(r)
   560  		bytesRead += nl.SizeofNfattr
   561  
   562  		switch t {
   563  		case nl.CTA_PROTOINFO_TCP_STATE:
   564  			p.State = parseProtoInfoTCPState(r)
   565  			bytesRead += nl.SizeofNfattr
   566  		default:
   567  			bytesRead += int(skipNfAttrValue(r, l))
   568  		}
   569  	}
   570  
   571  	return p
   572  }
   573  
   574  func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) {
   575  	bytesRead := 0
   576  	for bytesRead < int(attrLen) {
   577  		_, t, l := parseNfAttrTL(r)
   578  		bytesRead += nl.SizeofNfattr
   579  
   580  		switch t {
   581  		case nl.CTA_PROTOINFO_TCP:
   582  			p = parseProtoInfoTCP(r, l)
   583  			bytesRead += int(l)
   584  		// No inner fields of DCCP / SCTP currently supported.
   585  		case nl.CTA_PROTOINFO_DCCP:
   586  			p = new(ProtoInfoDCCP)
   587  			skipped := skipNfAttrValue(r, l)
   588  			bytesRead += int(skipped)
   589  		case nl.CTA_PROTOINFO_SCTP:
   590  			p = new(ProtoInfoSCTP)
   591  			skipped := skipNfAttrValue(r, l)
   592  			bytesRead += int(skipped)
   593  		default:
   594  			skipped := skipNfAttrValue(r, l)
   595  			bytesRead += int(skipped)
   596  		}
   597  	}
   598  
   599  	return p
   600  }
   601  
   602  func parseTimeOut(r *bytes.Reader) (ttimeout uint32) {
   603  	parseBERaw32(r, &ttimeout)
   604  	return
   605  }
   606  
   607  func parseConnectionMark(r *bytes.Reader) (mark uint32) {
   608  	parseBERaw32(r, &mark)
   609  	return
   610  }
   611  
   612  func parseConnectionLabels(r *bytes.Reader) (label []byte) {
   613  	label = make([]byte, 16) // netfilter defines 128 bit labels value
   614  	binary.Read(r, nl.NativeEndian(), &label)
   615  	return
   616  }
   617  
   618  func parseConnectionZone(r *bytes.Reader) (zone uint16) {
   619  	parseBERaw16(r, &zone)
   620  	r.Seek(2, seekCurrent)
   621  	return
   622  }
   623  
   624  func parseRawData(data []byte) *ConntrackFlow {
   625  	s := &ConntrackFlow{}
   626  	// First there is the Nfgenmsg header
   627  	// consume only the family field
   628  	reader := bytes.NewReader(data)
   629  	binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
   630  
   631  	// skip rest of the Netfilter header
   632  	reader.Seek(3, seekCurrent)
   633  	// The message structure is the following:
   634  	// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
   635  	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
   636  	// flow information of the forward flow
   637  	// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
   638  	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
   639  	// flow information of the reverse flow
   640  	for reader.Len() > 0 {
   641  		if nested, t, l := parseNfAttrTL(reader); nested {
   642  			switch t {
   643  			case nl.CTA_TUPLE_ORIG:
   644  				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
   645  					parseIpTuple(reader, &s.Forward)
   646  				}
   647  			case nl.CTA_TUPLE_REPLY:
   648  				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
   649  					parseIpTuple(reader, &s.Reverse)
   650  				} else {
   651  					// Header not recognized skip it
   652  					skipNfAttrValue(reader, l)
   653  				}
   654  			case nl.CTA_COUNTERS_ORIG:
   655  				s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
   656  			case nl.CTA_COUNTERS_REPLY:
   657  				s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
   658  			case nl.CTA_TIMESTAMP:
   659  				s.TimeStart, s.TimeStop = parseTimeStamp(reader, l)
   660  			case nl.CTA_PROTOINFO:
   661  				s.ProtoInfo = parseProtoInfo(reader, l)
   662  			default:
   663  				skipNfAttrValue(reader, l)
   664  			}
   665  		} else {
   666  			switch t {
   667  			case nl.CTA_MARK:
   668  				s.Mark = parseConnectionMark(reader)
   669  				case nl.CTA_LABELS:
   670  				s.Labels = parseConnectionLabels(reader)
   671  			case nl.CTA_TIMEOUT:
   672  				s.TimeOut = parseTimeOut(reader)
   673  			case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE:
   674  				skipNfAttrValue(reader, l)
   675  			case nl.CTA_ZONE:
   676  				s.Zone = parseConnectionZone(reader)
   677  			default:
   678  				skipNfAttrValue(reader, l)
   679  			}
   680  		}
   681  	}
   682  	return s
   683  }
   684  
   685  // Conntrack parameters and options:
   686  //   -n, --src-nat ip                      source NAT ip
   687  //   -g, --dst-nat ip                      destination NAT ip
   688  //   -j, --any-nat ip                      source or destination NAT ip
   689  //   -m, --mark mark                       Set mark
   690  //   -c, --secmark secmark                 Set selinux secmark
   691  //   -e, --event-mask eventmask            Event mask, eg. NEW,DESTROY
   692  //   -z, --zero                            Zero counters while listing
   693  //   -o, --output type[,...]               Output format, eg. xml
   694  //   -l, --label label[,...]               conntrack labels
   695  
   696  // Common parameters and options:
   697  //   -s, --src, --orig-src ip              Source address from original direction
   698  //   -d, --dst, --orig-dst ip              Destination address from original direction
   699  //   -r, --reply-src ip            Source address from reply direction
   700  //   -q, --reply-dst ip            Destination address from reply direction
   701  //   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
   702  //   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
   703  //   -t, --timeout timeout         Set timeout
   704  //   -u, --status status           Set status, eg. ASSURED
   705  //   -w, --zone value              Set conntrack zone
   706  //   --orig-zone value             Set zone for original direction
   707  //   --reply-zone value            Set zone for reply direction
   708  //   -b, --buffer-size             Netlink socket buffer size
   709  //   --mask-src ip                 Source mask address
   710  //   --mask-dst ip                 Destination mask address
   711  
   712  // Layer 4 Protocol common parameters and options:
   713  // TCP, UDP, SCTP, UDPLite and DCCP
   714  //    --sport, --orig-port-src port    Source port in original direction
   715  //    --dport, --orig-port-dst port    Destination port in original direction
   716  
   717  // Filter types
   718  type ConntrackFilterType uint8
   719  
   720  const (
   721  	ConntrackOrigSrcIP     = iota                // -orig-src ip    Source address from original direction
   722  	ConntrackOrigDstIP                           // -orig-dst ip    Destination address from original direction
   723  	ConntrackReplySrcIP                          // --reply-src ip  Reply Source IP
   724  	ConntrackReplyDstIP                          // --reply-dst ip  Reply Destination IP
   725  	ConntrackReplyAnyIP                          // Match source or destination reply IP
   726  	ConntrackOrigSrcPort                         // --orig-port-src port    Source port in original direction
   727  	ConntrackOrigDstPort                         // --orig-port-dst port    Destination port in original direction
   728  	ConntrackMatchLabels                         // --label label1,label2   Labels used in entry
   729  	ConntrackUnmatchLabels                       // --label label1,label2   Labels not used in entry
   730  	ConntrackNatSrcIP      = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
   731  	ConntrackNatDstIP      = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
   732  	ConntrackNatAnyIP      = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP
   733  )
   734  
   735  type CustomConntrackFilter interface {
   736  	// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches
   737  	// the filter or false otherwise
   738  	MatchConntrackFlow(flow *ConntrackFlow) bool
   739  }
   740  
   741  type ConntrackFilter struct {
   742  	ipNetFilter map[ConntrackFilterType]*net.IPNet
   743  	portFilter  map[ConntrackFilterType]uint16
   744  	protoFilter uint8
   745  	labelFilter map[ConntrackFilterType][][]byte
   746  	zoneFilter  *uint16
   747  }
   748  
   749  // AddIPNet adds a IP subnet to the conntrack filter
   750  func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error {
   751  	if ipNet == nil {
   752  		return fmt.Errorf("Filter attribute empty")
   753  	}
   754  	if f.ipNetFilter == nil {
   755  		f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet)
   756  	}
   757  	if _, ok := f.ipNetFilter[tp]; ok {
   758  		return errors.New("Filter attribute already present")
   759  	}
   760  	f.ipNetFilter[tp] = ipNet
   761  	return nil
   762  }
   763  
   764  // AddIP adds an IP to the conntrack filter
   765  func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
   766  	if ip == nil {
   767  		return fmt.Errorf("Filter attribute empty")
   768  	}
   769  	return f.AddIPNet(tp, NewIPNet(ip))
   770  }
   771  
   772  // AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it
   773  func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error {
   774  	switch f.protoFilter {
   775  	// TCP, UDP, DCCP, SCTP, UDPLite
   776  	case 6, 17, 33, 132, 136:
   777  	default:
   778  		return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter)
   779  	}
   780  
   781  	if f.portFilter == nil {
   782  		f.portFilter = make(map[ConntrackFilterType]uint16)
   783  	}
   784  	if _, ok := f.portFilter[tp]; ok {
   785  		return errors.New("Filter attribute already present")
   786  	}
   787  	f.portFilter[tp] = port
   788  	return nil
   789  }
   790  
   791  // AddProtocol adds the Layer 4 protocol to the conntrack filter
   792  func (f *ConntrackFilter) AddProtocol(proto uint8) error {
   793  	if f.protoFilter != 0 {
   794  		return errors.New("Filter attribute already present")
   795  	}
   796  	f.protoFilter = proto
   797  	return nil
   798  }
   799  
   800  // AddLabels adds the provided list (zero or more) of labels to the conntrack filter
   801  // ConntrackFilterType here can be either:
   802  //  1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0)
   803  //     against the list of provided labels. If `flow.Labels` contains ALL the provided labels
   804  //     it is considered a match. This can be used when you want to match flows that contain
   805  //     one or more labels.
   806  //  2. ConntrackUnmatchLabels:  This matches every flow that has a label value (len(flow.Labels) > 0)
   807  //     against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels
   808  //     it is considered a match. This can be used when you want to match flows that don't contain
   809  //     one or more labels.
   810  func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error {
   811  	if len(labels) == 0 {
   812  		return errors.New("Invalid length for provided labels")
   813  	}
   814  	if f.labelFilter == nil {
   815  		f.labelFilter = make(map[ConntrackFilterType][][]byte)
   816  	}
   817  	if _, ok := f.labelFilter[tp]; ok {
   818  		return errors.New("Filter attribute already present")
   819  	}
   820  	f.labelFilter[tp] = labels
   821  	return nil
   822  }
   823  
   824  // AddZone adds a zone to the conntrack filter
   825  func (f *ConntrackFilter) AddZone(zone uint16) error {
   826  	if f.zoneFilter != nil {
   827  		return errors.New("Filter attribute already present")
   828  	}
   829  	f.zoneFilter = &zone
   830  	return nil
   831  }
   832  
   833  // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
   834  // false otherwise
   835  func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
   836  	if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil {
   837  		// empty filter always not match
   838  		return false
   839  	}
   840  
   841  	// -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
   842  	if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter {
   843  		// different Layer 4 protocol always not match
   844  		return false
   845  	}
   846  
   847  	// Conntrack zone filter
   848  	if f.zoneFilter != nil && *f.zoneFilter != flow.Zone {
   849  		return false
   850  	}
   851  
   852  	match := true
   853  
   854  	// IP conntrack filter
   855  	if len(f.ipNetFilter) > 0 {
   856  		// -orig-src ip   Source address from original direction
   857  		if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found {
   858  			match = match && elem.Contains(flow.Forward.SrcIP)
   859  		}
   860  
   861  		// -orig-dst ip   Destination address from original direction
   862  		if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found {
   863  			match = match && elem.Contains(flow.Forward.DstIP)
   864  		}
   865  
   866  		// -src-nat ip    Source NAT ip
   867  		if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found {
   868  			match = match && elem.Contains(flow.Reverse.SrcIP)
   869  		}
   870  
   871  		// -dst-nat ip    Destination NAT ip
   872  		if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found {
   873  			match = match && elem.Contains(flow.Reverse.DstIP)
   874  		}
   875  
   876  		// Match source or destination reply IP
   877  		if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found {
   878  			match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP))
   879  		}
   880  	}
   881  
   882  	// Layer 4 Port filter
   883  	if len(f.portFilter) > 0 {
   884  		// -orig-port-src port	Source port from original direction
   885  		if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found {
   886  			match = match && elem == flow.Forward.SrcPort
   887  		}
   888  
   889  		// -orig-port-dst port	Destination port from original direction
   890  		if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found {
   891  			match = match && elem == flow.Forward.DstPort
   892  		}
   893  	}
   894  
   895  	// Label filter
   896  	if len(f.labelFilter) > 0 {
   897  		if len(flow.Labels) > 0 {
   898  			// --label label1,label2 in conn entry;
   899  			// every label passed should be contained in flow.Labels for a match to be true
   900  			if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found {
   901  				for _, label := range elem {
   902  					match = match && (bytes.Contains(flow.Labels, label))
   903  				}
   904  			}
   905  			// --label label1,label2 in conn entry;
   906  			// every label passed should be not contained in flow.Labels for a match to be true
   907  			if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found {
   908  				for _, label := range elem {
   909  					match = match && !(bytes.Contains(flow.Labels, label))
   910  				}
   911  			}
   912  		} else {
   913  			// flow doesn't contain labels, so it doesn't contain or notContain any provided matches
   914  			match = false
   915  		}
   916  	}
   917  
   918  	return match
   919  }
   920  
   921  var _ CustomConntrackFilter = (*ConntrackFilter)(nil)