github.com/sagernet/netlink@v0.0.0-20240612041022-b9a21c07ac6a/conntrack_linux.go (about) 1 package netlink 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "net" 9 "time" 10 11 "github.com/sagernet/netlink/nl" 12 "golang.org/x/sys/unix" 13 ) 14 15 // ConntrackTableType Conntrack table for the netlink operation 16 type ConntrackTableType uint8 17 18 const ( 19 // ConntrackTable Conntrack table 20 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK 1 21 ConntrackTable = 1 22 // ConntrackExpectTable Conntrack expect table 23 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2 24 ConntrackExpectTable = 2 25 ) 26 27 const ( 28 // backward compatibility with golang 1.6 which does not have io.SeekCurrent 29 seekCurrent = 1 30 ) 31 32 // InetFamily Family type 33 type InetFamily uint8 34 35 // -L [table] [options] List conntrack or expectation table 36 // -G [table] parameters Get conntrack or expectation 37 38 // -I [table] parameters Create a conntrack or expectation 39 // -U [table] parameters Update a conntrack 40 // -E [table] [options] Show events 41 42 // -C [table] Show counter 43 // -S Show statistics 44 45 // ConntrackTableList returns the flow list of a table of a specific family 46 // conntrack -L [table] [options] List conntrack or expectation table 47 func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { 48 return pkgHandle.ConntrackTableList(table, family) 49 } 50 51 // ConntrackTableFlush flushes all the flows of a specified table 52 // conntrack -F [table] Flush table 53 // The flush operation applies to all the family types 54 func ConntrackTableFlush(table ConntrackTableType) error { 55 return pkgHandle.ConntrackTableFlush(table) 56 } 57 58 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter 59 // conntrack -D [table] parameters Delete conntrack or expectation 60 func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { 61 return pkgHandle.ConntrackDeleteFilter(table, family, filter) 62 } 63 64 // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed 65 // conntrack -L [table] [options] List conntrack or expectation table 66 func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { 67 res, err := h.dumpConntrackTable(table, family) 68 if err != nil { 69 return nil, err 70 } 71 72 // Deserialize all the flows 73 var result []*ConntrackFlow 74 for _, dataRaw := range res { 75 result = append(result, parseRawData(dataRaw)) 76 } 77 78 return result, nil 79 } 80 81 // ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed 82 // conntrack -F [table] Flush table 83 // The flush operation applies to all the family types 84 func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { 85 req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) 86 _, err := req.Execute(unix.NETLINK_NETFILTER, 0) 87 return err 88 } 89 90 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed 91 // conntrack -D [table] parameters Delete conntrack or expectation 92 func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { 93 res, err := h.dumpConntrackTable(table, family) 94 if err != nil { 95 return 0, err 96 } 97 98 var matched uint 99 for _, dataRaw := range res { 100 flow := parseRawData(dataRaw) 101 if match := filter.MatchConntrackFlow(flow); match { 102 req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) 103 // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already 104 req2.AddRawData(dataRaw[4:]) 105 req2.Execute(unix.NETLINK_NETFILTER, 0) 106 matched++ 107 } 108 } 109 110 return matched, nil 111 } 112 113 func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest { 114 // Create the Netlink request object 115 req := h.newNetlinkRequest((int(table)<<8)|operation, flags) 116 // Add the netfilter header 117 msg := &nl.Nfgenmsg{ 118 NfgenFamily: uint8(family), 119 Version: nl.NFNETLINK_V0, 120 ResId: 0, 121 } 122 req.AddData(msg) 123 return req 124 } 125 126 func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) { 127 req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP) 128 return req.Execute(unix.NETLINK_NETFILTER, 0) 129 } 130 131 // The full conntrack flow structure is very complicated and can be found in the file: 132 // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h 133 // For the time being, the structure below allows to parse and extract the base information of a flow 134 type ipTuple struct { 135 Bytes uint64 136 DstIP net.IP 137 DstPort uint16 138 Packets uint64 139 Protocol uint8 140 SrcIP net.IP 141 SrcPort uint16 142 } 143 144 type ConntrackFlow struct { 145 FamilyType uint8 146 Forward ipTuple 147 Reverse ipTuple 148 Mark uint32 149 TimeStart uint64 150 TimeStop uint64 151 TimeOut uint32 152 } 153 154 func (s *ConntrackFlow) String() string { 155 // conntrack cmd output: 156 // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 157 // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) 158 start := time.Unix(0, int64(s.TimeStart)) 159 stop := time.Unix(0, int64(s.TimeStop)) 160 timeout := int32(s.TimeOut) 161 return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)", 162 nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, 163 s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, 164 s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, 165 s.Mark, start, stop, timeout) 166 } 167 168 // This method parse the ip tuple structure 169 // The message structure is the following: 170 // <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP> 171 // <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP> 172 // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding> 173 // <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding> 174 // <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding> 175 func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { 176 for i := 0; i < 2; i++ { 177 _, t, _, v := parseNfAttrTLV(reader) 178 switch t { 179 case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC: 180 tpl.SrcIP = v 181 case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST: 182 tpl.DstIP = v 183 } 184 } 185 // Get total length of nested protocol-specific info. 186 _, _, protoInfoTotalLen := parseNfAttrTL(reader) 187 _, t, l, v := parseNfAttrTLV(reader) 188 // Track the number of bytes read. 189 protoInfoBytesRead := uint16(nl.SizeofNfattr) + l 190 if t == nl.CTA_PROTO_NUM { 191 tpl.Protocol = uint8(v[0]) 192 } 193 // We only parse TCP & UDP headers. Skip the others. 194 if tpl.Protocol != 6 && tpl.Protocol != 17 { 195 // skip the rest 196 bytesRemaining := protoInfoTotalLen - protoInfoBytesRead 197 reader.Seek(int64(bytesRemaining), seekCurrent) 198 return tpl.Protocol 199 } 200 // Skip 3 bytes of padding 201 reader.Seek(3, seekCurrent) 202 protoInfoBytesRead += 3 203 for i := 0; i < 2; i++ { 204 _, t, _ := parseNfAttrTL(reader) 205 protoInfoBytesRead += uint16(nl.SizeofNfattr) 206 switch t { 207 case nl.CTA_PROTO_SRC_PORT: 208 parseBERaw16(reader, &tpl.SrcPort) 209 protoInfoBytesRead += 2 210 case nl.CTA_PROTO_DST_PORT: 211 parseBERaw16(reader, &tpl.DstPort) 212 protoInfoBytesRead += 2 213 } 214 // Skip 2 bytes of padding 215 reader.Seek(2, seekCurrent) 216 protoInfoBytesRead += 2 217 } 218 // Skip any remaining/unknown parts of the message 219 bytesRemaining := protoInfoTotalLen - protoInfoBytesRead 220 reader.Seek(int64(bytesRemaining), seekCurrent) 221 222 return tpl.Protocol 223 } 224 225 func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) { 226 isNested, attrType, len = parseNfAttrTL(r) 227 228 value = make([]byte, len) 229 binary.Read(r, binary.BigEndian, &value) 230 return isNested, attrType, len, value 231 } 232 233 func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { 234 binary.Read(r, nl.NativeEndian(), &len) 235 len -= nl.SizeofNfattr 236 237 binary.Read(r, nl.NativeEndian(), &attrType) 238 isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED 239 attrType = attrType & (nl.NLA_F_NESTED - 1) 240 return isNested, attrType, len 241 } 242 243 func skipNfAttrValue(r *bytes.Reader, len uint16) { 244 len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) 245 r.Seek(int64(len), seekCurrent) 246 } 247 248 func parseBERaw16(r *bytes.Reader, v *uint16) { 249 binary.Read(r, binary.BigEndian, v) 250 } 251 252 func parseBERaw32(r *bytes.Reader, v *uint32) { 253 binary.Read(r, binary.BigEndian, v) 254 } 255 256 func parseBERaw64(r *bytes.Reader, v *uint64) { 257 binary.Read(r, binary.BigEndian, v) 258 } 259 260 func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { 261 for i := 0; i < 2; i++ { 262 switch _, t, _ := parseNfAttrTL(r); t { 263 case nl.CTA_COUNTERS_BYTES: 264 parseBERaw64(r, &bytes) 265 case nl.CTA_COUNTERS_PACKETS: 266 parseBERaw64(r, &packets) 267 default: 268 return 269 } 270 } 271 return 272 } 273 274 // when the flow is alive, only the timestamp_start is returned in structure 275 func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { 276 var numTimeStamps int 277 oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp 278 if readSize == uint16(oneItem) { 279 numTimeStamps = 1 280 } else if readSize == 2*uint16(oneItem) { 281 numTimeStamps = 2 282 } else { 283 return 284 } 285 for i := 0; i < numTimeStamps; i++ { 286 switch _, t, _ := parseNfAttrTL(r); t { 287 case nl.CTA_TIMESTAMP_START: 288 parseBERaw64(r, &tstart) 289 case nl.CTA_TIMESTAMP_STOP: 290 parseBERaw64(r, &tstop) 291 default: 292 return 293 } 294 } 295 return 296 297 } 298 299 func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { 300 parseBERaw32(r, &ttimeout) 301 return 302 } 303 304 func parseConnectionMark(r *bytes.Reader) (mark uint32) { 305 parseBERaw32(r, &mark) 306 return 307 } 308 309 func parseRawData(data []byte) *ConntrackFlow { 310 s := &ConntrackFlow{} 311 // First there is the Nfgenmsg header 312 // consume only the family field 313 reader := bytes.NewReader(data) 314 binary.Read(reader, nl.NativeEndian(), &s.FamilyType) 315 316 // skip rest of the Netfilter header 317 reader.Seek(3, seekCurrent) 318 // The message structure is the following: 319 // <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes 320 // <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes 321 // flow information of the forward flow 322 // <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes 323 // <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes 324 // flow information of the reverse flow 325 for reader.Len() > 0 { 326 if nested, t, l := parseNfAttrTL(reader); nested { 327 switch t { 328 case nl.CTA_TUPLE_ORIG: 329 if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { 330 parseIpTuple(reader, &s.Forward) 331 } 332 case nl.CTA_TUPLE_REPLY: 333 if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { 334 parseIpTuple(reader, &s.Reverse) 335 } else { 336 // Header not recognized skip it 337 skipNfAttrValue(reader, l) 338 } 339 case nl.CTA_COUNTERS_ORIG: 340 s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) 341 case nl.CTA_COUNTERS_REPLY: 342 s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) 343 case nl.CTA_TIMESTAMP: 344 s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) 345 case nl.CTA_PROTOINFO: 346 skipNfAttrValue(reader, l) 347 default: 348 skipNfAttrValue(reader, l) 349 } 350 } else { 351 switch t { 352 case nl.CTA_MARK: 353 s.Mark = parseConnectionMark(reader) 354 case nl.CTA_TIMEOUT: 355 s.TimeOut = parseTimeOut(reader) 356 case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID: 357 skipNfAttrValue(reader, l) 358 default: 359 skipNfAttrValue(reader, l) 360 } 361 } 362 } 363 return s 364 } 365 366 // Conntrack parameters and options: 367 // -n, --src-nat ip source NAT ip 368 // -g, --dst-nat ip destination NAT ip 369 // -j, --any-nat ip source or destination NAT ip 370 // -m, --mark mark Set mark 371 // -c, --secmark secmark Set selinux secmark 372 // -e, --event-mask eventmask Event mask, eg. NEW,DESTROY 373 // -z, --zero Zero counters while listing 374 // -o, --output type[,...] Output format, eg. xml 375 // -l, --label label[,...] conntrack labels 376 377 // Common parameters and options: 378 // -s, --src, --orig-src ip Source address from original direction 379 // -d, --dst, --orig-dst ip Destination address from original direction 380 // -r, --reply-src ip Source address from reply direction 381 // -q, --reply-dst ip Destination address from reply direction 382 // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' 383 // -f, --family proto Layer 3 Protocol, eg. 'ipv6' 384 // -t, --timeout timeout Set timeout 385 // -u, --status status Set status, eg. ASSURED 386 // -w, --zone value Set conntrack zone 387 // --orig-zone value Set zone for original direction 388 // --reply-zone value Set zone for reply direction 389 // -b, --buffer-size Netlink socket buffer size 390 // --mask-src ip Source mask address 391 // --mask-dst ip Destination mask address 392 393 // Layer 4 Protocol common parameters and options: 394 // TCP, UDP, SCTP, UDPLite and DCCP 395 // --sport, --orig-port-src port Source port in original direction 396 // --dport, --orig-port-dst port Destination port in original direction 397 398 // Filter types 399 type ConntrackFilterType uint8 400 401 const ( 402 ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction 403 ConntrackOrigDstIP // -orig-dst ip Destination address from original direction 404 ConntrackReplySrcIP // --reply-src ip Reply Source IP 405 ConntrackReplyDstIP // --reply-dst ip Reply Destination IP 406 ConntrackReplyAnyIP // Match source or destination reply IP 407 ConntrackOrigSrcPort // --orig-port-src port Source port in original direction 408 ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction 409 ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP 410 ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP 411 ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP 412 ) 413 414 type CustomConntrackFilter interface { 415 // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches 416 // the filter or false otherwise 417 MatchConntrackFlow(flow *ConntrackFlow) bool 418 } 419 420 type ConntrackFilter struct { 421 ipNetFilter map[ConntrackFilterType]*net.IPNet 422 portFilter map[ConntrackFilterType]uint16 423 protoFilter uint8 424 } 425 426 // AddIPNet adds a IP subnet to the conntrack filter 427 func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error { 428 if ipNet == nil { 429 return fmt.Errorf("Filter attribute empty") 430 } 431 if f.ipNetFilter == nil { 432 f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet) 433 } 434 if _, ok := f.ipNetFilter[tp]; ok { 435 return errors.New("Filter attribute already present") 436 } 437 f.ipNetFilter[tp] = ipNet 438 return nil 439 } 440 441 // AddIP adds an IP to the conntrack filter 442 func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error { 443 if ip == nil { 444 return fmt.Errorf("Filter attribute empty") 445 } 446 return f.AddIPNet(tp, NewIPNet(ip)) 447 } 448 449 // AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it 450 func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error { 451 switch f.protoFilter { 452 // TCP, UDP, DCCP, SCTP, UDPLite 453 case 6, 17, 33, 132, 136: 454 default: 455 return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter) 456 } 457 458 if f.portFilter == nil { 459 f.portFilter = make(map[ConntrackFilterType]uint16) 460 } 461 if _, ok := f.portFilter[tp]; ok { 462 return errors.New("Filter attribute already present") 463 } 464 f.portFilter[tp] = port 465 return nil 466 } 467 468 // AddProtocol adds the Layer 4 protocol to the conntrack filter 469 func (f *ConntrackFilter) AddProtocol(proto uint8) error { 470 if f.protoFilter != 0 { 471 return errors.New("Filter attribute already present") 472 } 473 f.protoFilter = proto 474 return nil 475 } 476 477 // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter 478 // false otherwise 479 func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { 480 if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 { 481 // empty filter always not match 482 return false 483 } 484 485 // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' 486 if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter { 487 // different Layer 4 protocol always not match 488 return false 489 } 490 491 match := true 492 493 // IP conntrack filter 494 if len(f.ipNetFilter) > 0 { 495 // -orig-src ip Source address from original direction 496 if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found { 497 match = match && elem.Contains(flow.Forward.SrcIP) 498 } 499 500 // -orig-dst ip Destination address from original direction 501 if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found { 502 match = match && elem.Contains(flow.Forward.DstIP) 503 } 504 505 // -src-nat ip Source NAT ip 506 if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found { 507 match = match && elem.Contains(flow.Reverse.SrcIP) 508 } 509 510 // -dst-nat ip Destination NAT ip 511 if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found { 512 match = match && elem.Contains(flow.Reverse.DstIP) 513 } 514 515 // Match source or destination reply IP 516 if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found { 517 match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP)) 518 } 519 } 520 521 // Layer 4 Port filter 522 if len(f.portFilter) > 0 { 523 // -orig-port-src port Source port from original direction 524 if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found { 525 match = match && elem == flow.Forward.SrcPort 526 } 527 528 // -orig-port-dst port Destination port from original direction 529 if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found { 530 match = match && elem == flow.Forward.DstPort 531 } 532 } 533 534 return match 535 } 536 537 var _ CustomConntrackFilter = (*ConntrackFilter)(nil)