k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/proxy/util/nfacct/nfacct_linux.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2024 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package nfacct
    21  
    22  import (
    23  	"bytes"
    24  	"encoding/binary"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"syscall"
    29  
    30  	"github.com/vishvananda/netlink/nl"
    31  	"golang.org/x/sys/unix"
    32  )
    33  
    34  // MaxLength represents the maximum length allowed for the name in a nfacct counter.
    35  const MaxLength = 31
    36  
    37  // nf netlink nfacct commands, these should strictly match with the ones defined in kernel headers.
    38  // (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink_acct.h#L9-L16)
    39  const (
    40  	// NFNL_MSG_ACCT_NEW
    41  	cmdNew = 0
    42  	// NFNL_MSG_ACCT_GET
    43  	cmdGet = 1
    44  )
    45  
    46  // nf netlink nfacct attribute, these should strictly match with the ones defined in kernel headers.
    47  // (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink_acct.h#L24-L35)
    48  const (
    49  	// NFACCT_NAME
    50  	attrName = 1
    51  	// NFACCT_PKTS
    52  	attrPackets = 2
    53  	// NFACCT_BYTES
    54  	attrBytes = 3
    55  )
    56  
    57  // runner implements the Interface and depends on the handler for execution.
    58  type runner struct {
    59  	handler handler
    60  }
    61  
    62  // New returns a new Interface.
    63  func New() (Interface, error) {
    64  	hndlr, err := newNetlinkHandler()
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	return newInternal(hndlr)
    69  }
    70  
    71  // newInternal returns a new Interface with the given handler.
    72  func newInternal(hndlr handler) (Interface, error) {
    73  	return &runner{handler: hndlr}, nil
    74  
    75  }
    76  
    77  // Ensure is part of the interface.
    78  func (r *runner) Ensure(name string) error {
    79  	counter, err := r.Get(name)
    80  	if counter != nil {
    81  		return nil
    82  	}
    83  
    84  	if err != nil && errors.Is(err, ErrObjectNotFound) {
    85  		return handleError(r.Add(name))
    86  	} else if err != nil {
    87  		return handleError(err)
    88  	} else {
    89  		return ErrUnexpected
    90  	}
    91  }
    92  
    93  // Add is part of the interface.
    94  func (r *runner) Add(name string) error {
    95  	if name == "" {
    96  		return ErrEmptyName
    97  	}
    98  	if len(name) > MaxLength {
    99  		return ErrNameExceedsMaxLength
   100  	}
   101  
   102  	req := r.handler.newRequest(cmdNew, unix.NLM_F_REQUEST|unix.NLM_F_CREATE|unix.NLM_F_ACK)
   103  	req.AddData(nl.NewRtAttr(attrName, nl.ZeroTerminated(name)))
   104  	_, err := req.Execute(unix.NETLINK_NETFILTER, 0)
   105  	if err != nil {
   106  		return handleError(err)
   107  	}
   108  	return nil
   109  }
   110  
   111  // Get is part of the interface.
   112  func (r *runner) Get(name string) (*Counter, error) {
   113  	if len(name) > MaxLength {
   114  		return nil, ErrNameExceedsMaxLength
   115  	}
   116  
   117  	req := r.handler.newRequest(cmdGet, unix.NLM_F_REQUEST|unix.NLM_F_ACK)
   118  	req.AddData(nl.NewRtAttr(attrName, nl.ZeroTerminated(name)))
   119  	msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0)
   120  	if err != nil {
   121  		return nil, handleError(err)
   122  	}
   123  
   124  	var counter *Counter
   125  	for _, msg := range msgs {
   126  		counter, err = decode(msg, true)
   127  		if err != nil {
   128  			return nil, handleError(err)
   129  		}
   130  	}
   131  	return counter, nil
   132  }
   133  
   134  // List is part of the interface.
   135  func (r *runner) List() ([]*Counter, error) {
   136  	req := r.handler.newRequest(cmdGet, unix.NLM_F_REQUEST|unix.NLM_F_DUMP)
   137  	msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0)
   138  	if err != nil {
   139  		return nil, handleError(err)
   140  	}
   141  
   142  	counters := make([]*Counter, 0)
   143  	for _, msg := range msgs {
   144  		counter, err := decode(msg, true)
   145  		if err != nil {
   146  			return nil, handleError(err)
   147  		}
   148  		counters = append(counters, counter)
   149  	}
   150  	return counters, nil
   151  }
   152  
   153  var ErrObjectNotFound = errors.New("object not found")
   154  var ErrObjectAlreadyExists = errors.New("object already exists")
   155  var ErrNameExceedsMaxLength = fmt.Errorf("object name exceeds the maximum allowed length of %d characters", MaxLength)
   156  var ErrEmptyName = errors.New("object name cannot be empty")
   157  var ErrUnexpected = errors.New("unexpected error")
   158  
   159  func handleError(err error) error {
   160  	switch {
   161  	case err == nil:
   162  		return nil
   163  	case errors.Is(err, syscall.ENOENT):
   164  		return ErrObjectNotFound
   165  	case errors.Is(err, syscall.EBUSY):
   166  		return ErrObjectAlreadyExists
   167  	default:
   168  		return fmt.Errorf("%s: %s", ErrUnexpected.Error(), err.Error())
   169  	}
   170  }
   171  
   172  // decode function processes a byte stream, requiring the 'strict' parameter to be true in production and
   173  // false only for testing purposes. If in strict mode and any of the relevant attributes (name, packets, or bytes)
   174  // have not been processed, an error is returned indicating a failure to decode the byte stream.
   175  //
   176  // Parse the netlink message as per the documentation outlined in:
   177  // https://docs.kernel.org/userspace-api/netlink/intro.html
   178  //
   179  // Message Components:
   180  //   - netfilter generic message [4 bytes]
   181  //     struct nfgenmsg (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink.h#L32-L38)
   182  //   - attributes [variable-sized, must align to 4 bytes from the start of attribute]
   183  //     struct nlattr (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netlink.h#L220-L232)
   184  //
   185  // Attribute Components:
   186  //   - length [2 bytes]
   187  //     length includes bytes for defining the length itself, bytes for defining the type,
   188  //     and the actual bytes of data without any padding.
   189  //   - type [2 bytes]
   190  //   - data [variable-sized]
   191  //   - padding [optional]
   192  //
   193  // Example. Counter{Name: "dummy-metric", Packets: 123, Bytes: 54321} in netlink message:
   194  //
   195  //	struct nfgenmsg{
   196  //	    __u8  nfgen_family: AF_NETLINK
   197  //	    __u8    version:    nl.NFNETLINK_V0
   198  //	    __be16  res_id:     nl.NFNETLINK_V0
   199  //	}
   200  //
   201  //	struct nlattr{
   202  //	    __u16 nla_len:      13
   203  //	    __u16 nla_type:     NFACCT_NAME
   204  //	    char data:          dummy-metric\0
   205  //	}
   206  //
   207  //	(padding:)
   208  //	    data:               \0\0\0
   209  //
   210  //	struct nlattr{
   211  //	    __u16 nla_len:      12
   212  //	    __u16 nla_type:     NFACCT_PKTS
   213  //	    __u64: data:        123
   214  //	}
   215  //
   216  //	struct nlattr{
   217  //	    __u16 nla_len:      12
   218  //	    __u16 nla_type:     NFACCT_BYTES
   219  //	    __u64: data:        54321
   220  //	}
   221  func decode(msg []byte, strict bool) (*Counter, error) {
   222  	counter := &Counter{}
   223  	reader := bytes.NewReader(msg)
   224  	// skip the first 4 bytes (netfilter generic message).
   225  	if _, err := reader.Seek(nl.SizeofNfgenmsg, io.SeekCurrent); err != nil {
   226  		return nil, err
   227  	}
   228  
   229  	// attrsProcessed tracks the number of processed attributes.
   230  	var attrsProcessed int
   231  
   232  	// length and type of netlink attribute.
   233  	var length, attrType uint16
   234  
   235  	// now we are just left with the attributes(struct nlattr) after skipping netlink generic
   236  	// message; we iterate over all the attributes one by one to construct our Counter object.
   237  	for reader.Len() > 0 {
   238  		// netlink attributes are in LTV(length, type and value) format.
   239  
   240  		// STEP 1. parse length [2 bytes]
   241  		if err := binary.Read(reader, binary.NativeEndian, &length); err != nil {
   242  			return nil, err
   243  		}
   244  
   245  		// STEP 2. parse type   [2 bytes]
   246  		if err := binary.Read(reader, binary.NativeEndian, &attrType); err != nil {
   247  			return nil, err
   248  		}
   249  
   250  		// STEP 3. adjust the length
   251  		// adjust the length to consider the header bytes read in step(1) and step(2); the actual
   252  		// length of data will be 4 bytes less than the originally read value.
   253  		length -= 4
   254  
   255  		// STEP 4. parse value  [variable sized]
   256  		// The value can assume any data-type. To read it into the appropriate data structure, we need
   257  		// to know the data type in advance. We achieve this by switching on the attribute-type, and we
   258  		// allocate the 'adjusted length' bytes (as done in step(3)) for the data-structure.
   259  		switch attrType {
   260  		case attrName:
   261  			// NFACCT_NAME has a variable size, so we allocate a slice of 'adjusted length' bytes
   262  			// and read the next 'adjusted length' bytes into this slice.
   263  			data := make([]byte, length)
   264  			if err := binary.Read(reader, binary.NativeEndian, data); err != nil {
   265  				return nil, err
   266  			}
   267  			counter.Name = string(data[:length-1])
   268  			attrsProcessed++
   269  		case attrPackets:
   270  			// NFACCT_PKTS holds 8 bytes of data, so we directly read the next 8 bytes into a 64-bit
   271  			// unsigned integer (counter.Packets).
   272  			if err := binary.Read(reader, binary.BigEndian, &counter.Packets); err != nil {
   273  				return nil, err
   274  			}
   275  			attrsProcessed++
   276  		case attrBytes:
   277  			// NFACCT_BYTES holds 8 bytes of data, so we directly read the next 8 bytes into a 64-bit
   278  			// unsigned integer (counter.Bytes).
   279  			if err := binary.Read(reader, binary.BigEndian, &counter.Bytes); err != nil {
   280  				return nil, err
   281  			}
   282  			attrsProcessed++
   283  		default:
   284  			// skip the data part for unknown attribute
   285  			if _, err := reader.Seek(int64(length), io.SeekCurrent); err != nil {
   286  				return nil, err
   287  			}
   288  		}
   289  
   290  		// Move past the padding to align with the fixed-size length, always a multiple of 4.
   291  		// If, for instance, the length is 9, skip 3 bytes of padding to reach the start of
   292  		// the next attribute.
   293  		// (ref: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netlink.h#L220-L227)
   294  		if length%4 != 0 {
   295  			padding := 4 - length%4
   296  			if _, err := reader.Seek(int64(padding), io.SeekCurrent); err != nil {
   297  				return nil, err
   298  			}
   299  		}
   300  	}
   301  
   302  	// return err if any of the required attribute is not processed.
   303  	if strict && attrsProcessed != 3 {
   304  		return nil, errors.New("failed to decode byte-stream")
   305  	}
   306  	return counter, nil
   307  }