k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/proxy/util/nfacct/nfacct_linux.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2024 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package nfacct 21 22 import ( 23 "bytes" 24 "encoding/binary" 25 "errors" 26 "fmt" 27 "io" 28 "syscall" 29 30 "github.com/vishvananda/netlink/nl" 31 "golang.org/x/sys/unix" 32 ) 33 34 // MaxLength represents the maximum length allowed for the name in a nfacct counter. 35 const MaxLength = 31 36 37 // nf netlink nfacct commands, these should strictly match with the ones defined in kernel headers. 38 // (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink_acct.h#L9-L16) 39 const ( 40 // NFNL_MSG_ACCT_NEW 41 cmdNew = 0 42 // NFNL_MSG_ACCT_GET 43 cmdGet = 1 44 ) 45 46 // nf netlink nfacct attribute, these should strictly match with the ones defined in kernel headers. 47 // (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink_acct.h#L24-L35) 48 const ( 49 // NFACCT_NAME 50 attrName = 1 51 // NFACCT_PKTS 52 attrPackets = 2 53 // NFACCT_BYTES 54 attrBytes = 3 55 ) 56 57 // runner implements the Interface and depends on the handler for execution. 58 type runner struct { 59 handler handler 60 } 61 62 // New returns a new Interface. 63 func New() (Interface, error) { 64 hndlr, err := newNetlinkHandler() 65 if err != nil { 66 return nil, err 67 } 68 return newInternal(hndlr) 69 } 70 71 // newInternal returns a new Interface with the given handler. 72 func newInternal(hndlr handler) (Interface, error) { 73 return &runner{handler: hndlr}, nil 74 75 } 76 77 // Ensure is part of the interface. 78 func (r *runner) Ensure(name string) error { 79 counter, err := r.Get(name) 80 if counter != nil { 81 return nil 82 } 83 84 if err != nil && errors.Is(err, ErrObjectNotFound) { 85 return handleError(r.Add(name)) 86 } else if err != nil { 87 return handleError(err) 88 } else { 89 return ErrUnexpected 90 } 91 } 92 93 // Add is part of the interface. 94 func (r *runner) Add(name string) error { 95 if name == "" { 96 return ErrEmptyName 97 } 98 if len(name) > MaxLength { 99 return ErrNameExceedsMaxLength 100 } 101 102 req := r.handler.newRequest(cmdNew, unix.NLM_F_REQUEST|unix.NLM_F_CREATE|unix.NLM_F_ACK) 103 req.AddData(nl.NewRtAttr(attrName, nl.ZeroTerminated(name))) 104 _, err := req.Execute(unix.NETLINK_NETFILTER, 0) 105 if err != nil { 106 return handleError(err) 107 } 108 return nil 109 } 110 111 // Get is part of the interface. 112 func (r *runner) Get(name string) (*Counter, error) { 113 if len(name) > MaxLength { 114 return nil, ErrNameExceedsMaxLength 115 } 116 117 req := r.handler.newRequest(cmdGet, unix.NLM_F_REQUEST|unix.NLM_F_ACK) 118 req.AddData(nl.NewRtAttr(attrName, nl.ZeroTerminated(name))) 119 msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0) 120 if err != nil { 121 return nil, handleError(err) 122 } 123 124 var counter *Counter 125 for _, msg := range msgs { 126 counter, err = decode(msg, true) 127 if err != nil { 128 return nil, handleError(err) 129 } 130 } 131 return counter, nil 132 } 133 134 // List is part of the interface. 135 func (r *runner) List() ([]*Counter, error) { 136 req := r.handler.newRequest(cmdGet, unix.NLM_F_REQUEST|unix.NLM_F_DUMP) 137 msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0) 138 if err != nil { 139 return nil, handleError(err) 140 } 141 142 counters := make([]*Counter, 0) 143 for _, msg := range msgs { 144 counter, err := decode(msg, true) 145 if err != nil { 146 return nil, handleError(err) 147 } 148 counters = append(counters, counter) 149 } 150 return counters, nil 151 } 152 153 var ErrObjectNotFound = errors.New("object not found") 154 var ErrObjectAlreadyExists = errors.New("object already exists") 155 var ErrNameExceedsMaxLength = fmt.Errorf("object name exceeds the maximum allowed length of %d characters", MaxLength) 156 var ErrEmptyName = errors.New("object name cannot be empty") 157 var ErrUnexpected = errors.New("unexpected error") 158 159 func handleError(err error) error { 160 switch { 161 case err == nil: 162 return nil 163 case errors.Is(err, syscall.ENOENT): 164 return ErrObjectNotFound 165 case errors.Is(err, syscall.EBUSY): 166 return ErrObjectAlreadyExists 167 default: 168 return fmt.Errorf("%s: %s", ErrUnexpected.Error(), err.Error()) 169 } 170 } 171 172 // decode function processes a byte stream, requiring the 'strict' parameter to be true in production and 173 // false only for testing purposes. If in strict mode and any of the relevant attributes (name, packets, or bytes) 174 // have not been processed, an error is returned indicating a failure to decode the byte stream. 175 // 176 // Parse the netlink message as per the documentation outlined in: 177 // https://docs.kernel.org/userspace-api/netlink/intro.html 178 // 179 // Message Components: 180 // - netfilter generic message [4 bytes] 181 // struct nfgenmsg (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netfilter/nfnetlink.h#L32-L38) 182 // - attributes [variable-sized, must align to 4 bytes from the start of attribute] 183 // struct nlattr (definition: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netlink.h#L220-L232) 184 // 185 // Attribute Components: 186 // - length [2 bytes] 187 // length includes bytes for defining the length itself, bytes for defining the type, 188 // and the actual bytes of data without any padding. 189 // - type [2 bytes] 190 // - data [variable-sized] 191 // - padding [optional] 192 // 193 // Example. Counter{Name: "dummy-metric", Packets: 123, Bytes: 54321} in netlink message: 194 // 195 // struct nfgenmsg{ 196 // __u8 nfgen_family: AF_NETLINK 197 // __u8 version: nl.NFNETLINK_V0 198 // __be16 res_id: nl.NFNETLINK_V0 199 // } 200 // 201 // struct nlattr{ 202 // __u16 nla_len: 13 203 // __u16 nla_type: NFACCT_NAME 204 // char data: dummy-metric\0 205 // } 206 // 207 // (padding:) 208 // data: \0\0\0 209 // 210 // struct nlattr{ 211 // __u16 nla_len: 12 212 // __u16 nla_type: NFACCT_PKTS 213 // __u64: data: 123 214 // } 215 // 216 // struct nlattr{ 217 // __u16 nla_len: 12 218 // __u16 nla_type: NFACCT_BYTES 219 // __u64: data: 54321 220 // } 221 func decode(msg []byte, strict bool) (*Counter, error) { 222 counter := &Counter{} 223 reader := bytes.NewReader(msg) 224 // skip the first 4 bytes (netfilter generic message). 225 if _, err := reader.Seek(nl.SizeofNfgenmsg, io.SeekCurrent); err != nil { 226 return nil, err 227 } 228 229 // attrsProcessed tracks the number of processed attributes. 230 var attrsProcessed int 231 232 // length and type of netlink attribute. 233 var length, attrType uint16 234 235 // now we are just left with the attributes(struct nlattr) after skipping netlink generic 236 // message; we iterate over all the attributes one by one to construct our Counter object. 237 for reader.Len() > 0 { 238 // netlink attributes are in LTV(length, type and value) format. 239 240 // STEP 1. parse length [2 bytes] 241 if err := binary.Read(reader, binary.NativeEndian, &length); err != nil { 242 return nil, err 243 } 244 245 // STEP 2. parse type [2 bytes] 246 if err := binary.Read(reader, binary.NativeEndian, &attrType); err != nil { 247 return nil, err 248 } 249 250 // STEP 3. adjust the length 251 // adjust the length to consider the header bytes read in step(1) and step(2); the actual 252 // length of data will be 4 bytes less than the originally read value. 253 length -= 4 254 255 // STEP 4. parse value [variable sized] 256 // The value can assume any data-type. To read it into the appropriate data structure, we need 257 // to know the data type in advance. We achieve this by switching on the attribute-type, and we 258 // allocate the 'adjusted length' bytes (as done in step(3)) for the data-structure. 259 switch attrType { 260 case attrName: 261 // NFACCT_NAME has a variable size, so we allocate a slice of 'adjusted length' bytes 262 // and read the next 'adjusted length' bytes into this slice. 263 data := make([]byte, length) 264 if err := binary.Read(reader, binary.NativeEndian, data); err != nil { 265 return nil, err 266 } 267 counter.Name = string(data[:length-1]) 268 attrsProcessed++ 269 case attrPackets: 270 // NFACCT_PKTS holds 8 bytes of data, so we directly read the next 8 bytes into a 64-bit 271 // unsigned integer (counter.Packets). 272 if err := binary.Read(reader, binary.BigEndian, &counter.Packets); err != nil { 273 return nil, err 274 } 275 attrsProcessed++ 276 case attrBytes: 277 // NFACCT_BYTES holds 8 bytes of data, so we directly read the next 8 bytes into a 64-bit 278 // unsigned integer (counter.Bytes). 279 if err := binary.Read(reader, binary.BigEndian, &counter.Bytes); err != nil { 280 return nil, err 281 } 282 attrsProcessed++ 283 default: 284 // skip the data part for unknown attribute 285 if _, err := reader.Seek(int64(length), io.SeekCurrent); err != nil { 286 return nil, err 287 } 288 } 289 290 // Move past the padding to align with the fixed-size length, always a multiple of 4. 291 // If, for instance, the length is 9, skip 3 bytes of padding to reach the start of 292 // the next attribute. 293 // (ref: https://github.com/torvalds/linux/blob/v6.7/include/uapi/linux/netlink.h#L220-L227) 294 if length%4 != 0 { 295 padding := 4 - length%4 296 if _, err := reader.Seek(int64(padding), io.SeekCurrent); err != nil { 297 return nil, err 298 } 299 } 300 } 301 302 // return err if any of the required attribute is not processed. 303 if strict && attrsProcessed != 3 { 304 return nil, errors.New("failed to decode byte-stream") 305 } 306 return counter, nil 307 }