github.com/sagernet/quic-go@v0.43.1-beta.1/sys_conn_oob.go (about) 1 //go:build darwin || linux || freebsd 2 3 package quic 4 5 import ( 6 "encoding/binary" 7 "errors" 8 "log" 9 "net" 10 "net/netip" 11 "os" 12 "strconv" 13 "sync" 14 "syscall" 15 "time" 16 "unsafe" 17 18 "github.com/sagernet/quic-go/internal/protocol" 19 "github.com/sagernet/quic-go/internal/utils" 20 "golang.org/x/net/ipv4" 21 "golang.org/x/net/ipv6" 22 "golang.org/x/sys/unix" 23 ) 24 25 const ( 26 ecnMask = 0x3 27 oobBufferSize = 128 28 ) 29 30 // Contrary to what the naming suggests, the ipv{4,6}.Message is not dependent on the IP version. 31 // They're both just aliases for x/net/internal/socket.Message. 32 // This means we can use this struct to read from a socket that receives both IPv4 and IPv6 messages. 33 var _ ipv4.Message = ipv6.Message{} 34 35 type batchConn interface { 36 ReadBatch(ms []ipv4.Message, flags int) (int, error) 37 } 38 39 func inspectReadBuffer(c syscall.RawConn) (int, error) { 40 var size int 41 var serr error 42 if err := c.Control(func(fd uintptr) { 43 size, serr = unix.GetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF) 44 }); err != nil { 45 return 0, err 46 } 47 return size, serr 48 } 49 50 func inspectWriteBuffer(c syscall.RawConn) (int, error) { 51 var size int 52 var serr error 53 if err := c.Control(func(fd uintptr) { 54 size, serr = unix.GetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF) 55 }); err != nil { 56 return 0, err 57 } 58 return size, serr 59 } 60 61 func isECNDisabledUsingEnv() bool { 62 disabled, err := strconv.ParseBool(os.Getenv("QUIC_GO_DISABLE_ECN")) 63 return err == nil && disabled 64 } 65 66 type oobConn struct { 67 OOBCapablePacketConn 68 batchConn batchConn 69 70 readPos uint8 71 // Packets received from the kernel, but not yet returned by ReadPacket(). 72 messages []ipv4.Message 73 buffers [batchSize]*PacketBuffer 74 75 cap connCapabilities 76 } 77 78 var _ rawConn = &oobConn{} 79 80 func newConn(c OOBCapablePacketConn, supportsDF bool) (*oobConn, error) { 81 rawConn, err := c.SyscallConn() 82 if err != nil { 83 return nil, err 84 } 85 needsPacketInfo := false 86 if udpAddr, ok := c.LocalAddr().(*net.UDPAddr); ok && udpAddr.IP.IsUnspecified() { 87 needsPacketInfo = true 88 } 89 // We don't know if this a IPv4-only, IPv6-only or a IPv4-and-IPv6 connection. 90 // Try enabling receiving of ECN and packet info for both IP versions. 91 // We expect at least one of those syscalls to succeed. 92 var errECNIPv4, errECNIPv6, errPIIPv4, errPIIPv6 error 93 if err := rawConn.Control(func(fd uintptr) { 94 errECNIPv4 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_RECVTOS, 1) 95 errECNIPv6 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVTCLASS, 1) 96 97 if needsPacketInfo { 98 errPIIPv4 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, ipv4PKTINFO, 1) 99 errPIIPv6 = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1) 100 } 101 }); err != nil { 102 return nil, err 103 } 104 switch { 105 case errECNIPv4 == nil && errECNIPv6 == nil: 106 utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv4 and IPv6.") 107 case errECNIPv4 == nil && errECNIPv6 != nil: 108 utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv4.") 109 case errECNIPv4 != nil && errECNIPv6 == nil: 110 utils.DefaultLogger.Debugf("Activating reading of ECN bits for IPv6.") 111 case errECNIPv4 != nil && errECNIPv6 != nil: 112 return nil, errors.New("activating ECN failed for both IPv4 and IPv6") 113 } 114 if needsPacketInfo { 115 switch { 116 case errPIIPv4 == nil && errPIIPv6 == nil: 117 utils.DefaultLogger.Debugf("Activating reading of packet info for IPv4 and IPv6.") 118 case errPIIPv4 == nil && errPIIPv6 != nil: 119 utils.DefaultLogger.Debugf("Activating reading of packet info bits for IPv4.") 120 case errPIIPv4 != nil && errPIIPv6 == nil: 121 utils.DefaultLogger.Debugf("Activating reading of packet info bits for IPv6.") 122 case errPIIPv4 != nil && errPIIPv6 != nil: 123 return nil, errors.New("activating packet info failed for both IPv4 and IPv6") 124 } 125 } 126 127 // Allows callers to pass in a connection that already satisfies batchConn interface 128 // to make use of the optimisation. Otherwise, ipv4.NewPacketConn would unwrap the file descriptor 129 // via SyscallConn(), and read it that way, which might not be what the caller wants. 130 var bc batchConn 131 if ibc, ok := c.(batchConn); ok { 132 bc = ibc 133 } else { 134 bc = ipv4.NewPacketConn(c) 135 } 136 137 msgs := make([]ipv4.Message, batchSize) 138 for i := range msgs { 139 // preallocate the [][]byte 140 msgs[i].Buffers = make([][]byte, 1) 141 } 142 oobConn := &oobConn{ 143 OOBCapablePacketConn: c, 144 batchConn: bc, 145 messages: msgs, 146 readPos: batchSize, 147 cap: connCapabilities{ 148 DF: supportsDF, 149 GSO: isGSOEnabled(rawConn), 150 ECN: isECNEnabled(), 151 }, 152 } 153 for i := 0; i < batchSize; i++ { 154 oobConn.messages[i].OOB = make([]byte, oobBufferSize) 155 } 156 return oobConn, nil 157 } 158 159 var invalidCmsgOnceV4, invalidCmsgOnceV6 sync.Once 160 161 func (c *oobConn) ReadPacket() (receivedPacket, error) { 162 if len(c.messages) == int(c.readPos) { // all messages read. Read the next batch of messages. 163 c.messages = c.messages[:batchSize] 164 // replace buffers data buffers up to the packet that has been consumed during the last ReadBatch call 165 for i := uint8(0); i < c.readPos; i++ { 166 buffer := GetPacketBuffer() 167 buffer.Data = buffer.Data[:protocol.MaxPacketBufferSize] 168 c.buffers[i] = buffer 169 c.messages[i].Buffers[0] = c.buffers[i].Data 170 } 171 c.readPos = 0 172 173 n, err := c.batchConn.ReadBatch(c.messages, 0) 174 if n == 0 || err != nil { 175 return receivedPacket{}, err 176 } 177 c.messages = c.messages[:n] 178 } 179 180 msg := c.messages[c.readPos] 181 buffer := c.buffers[c.readPos] 182 c.readPos++ 183 184 data := msg.OOB[:msg.NN] 185 p := receivedPacket{ 186 remoteAddr: msg.Addr, 187 rcvTime: time.Now(), 188 data: msg.Buffers[0][:msg.N], 189 buffer: buffer, 190 } 191 for len(data) > 0 { 192 hdr, body, remainder, err := unix.ParseOneSocketControlMessage(data) 193 if err != nil { 194 return receivedPacket{}, err 195 } 196 if hdr.Level == unix.IPPROTO_IP { 197 switch hdr.Type { 198 case msgTypeIPTOS: 199 p.ecn = protocol.ParseECNHeaderBits(body[0] & ecnMask) 200 case ipv4PKTINFO: 201 ip, ifIndex, ok := parseIPv4PktInfo(body) 202 if ok { 203 p.info.addr = ip 204 p.info.ifIndex = ifIndex 205 } else { 206 invalidCmsgOnceV4.Do(func() { 207 log.Printf("Received invalid IPv4 packet info control message: %+x. "+ 208 "This should never occur, please open a new issue and include details about the architecture.", body) 209 }) 210 } 211 } 212 } 213 if hdr.Level == unix.IPPROTO_IPV6 { 214 switch hdr.Type { 215 case unix.IPV6_TCLASS: 216 p.ecn = protocol.ParseECNHeaderBits(body[0] & ecnMask) 217 case unix.IPV6_PKTINFO: 218 // struct in6_pktinfo { 219 // struct in6_addr ipi6_addr; /* src/dst IPv6 address */ 220 // unsigned int ipi6_ifindex; /* send/recv interface index */ 221 // }; 222 if len(body) == 20 { 223 p.info.addr = netip.AddrFrom16(*(*[16]byte)(body[:16])).Unmap() 224 p.info.ifIndex = binary.LittleEndian.Uint32(body[16:]) 225 } else { 226 invalidCmsgOnceV6.Do(func() { 227 log.Printf("Received invalid IPv6 packet info control message: %+x. "+ 228 "This should never occur, please open a new issue and include details about the architecture.", body) 229 }) 230 } 231 } 232 } 233 data = remainder 234 } 235 return p, nil 236 } 237 238 // WritePacket writes a new packet. 239 func (c *oobConn) WritePacket(b []byte, addr net.Addr, packetInfoOOB []byte, gsoSize uint16, ecn protocol.ECN) (int, error) { 240 oob := packetInfoOOB 241 if gsoSize > 0 { 242 if !c.capabilities().GSO { 243 panic("GSO disabled") 244 } 245 oob = appendUDPSegmentSizeMsg(oob, gsoSize) 246 } 247 if ecn != protocol.ECNUnsupported { 248 if !c.capabilities().ECN { 249 panic("tried to send an ECN-marked packet although ECN is disabled") 250 } 251 if remoteUDPAddr, ok := addr.(*net.UDPAddr); ok { 252 if remoteUDPAddr.IP.To4() != nil { 253 oob = appendIPv4ECNMsg(oob, ecn) 254 } else { 255 oob = appendIPv6ECNMsg(oob, ecn) 256 } 257 } 258 } 259 n, _, err := c.OOBCapablePacketConn.WriteMsgUDP(b, oob, addr.(*net.UDPAddr)) 260 return n, err 261 } 262 263 func (c *oobConn) capabilities() connCapabilities { 264 return c.cap 265 } 266 267 type packetInfo struct { 268 addr netip.Addr 269 ifIndex uint32 270 } 271 272 func (info *packetInfo) OOB() []byte { 273 if info == nil { 274 return nil 275 } 276 if info.addr.Is4() { 277 ip := info.addr.As4() 278 // struct in_pktinfo { 279 // unsigned int ipi_ifindex; /* Interface index */ 280 // struct in_addr ipi_spec_dst; /* Local address */ 281 // struct in_addr ipi_addr; /* Header Destination address */ 282 // }; 283 cm := ipv4.ControlMessage{ 284 Src: ip[:], 285 IfIndex: int(info.ifIndex), 286 } 287 return cm.Marshal() 288 } else if info.addr.Is6() { 289 ip := info.addr.As16() 290 // struct in6_pktinfo { 291 // struct in6_addr ipi6_addr; /* src/dst IPv6 address */ 292 // unsigned int ipi6_ifindex; /* send/recv interface index */ 293 // }; 294 cm := ipv6.ControlMessage{ 295 Src: ip[:], 296 IfIndex: int(info.ifIndex), 297 } 298 return cm.Marshal() 299 } 300 return nil 301 } 302 303 func appendIPv4ECNMsg(b []byte, val protocol.ECN) []byte { 304 startLen := len(b) 305 b = append(b, make([]byte, unix.CmsgSpace(ecnIPv4DataLen))...) 306 h := (*unix.Cmsghdr)(unsafe.Pointer(&b[startLen])) 307 h.Level = syscall.IPPROTO_IP 308 h.Type = unix.IP_TOS 309 h.SetLen(unix.CmsgLen(ecnIPv4DataLen)) 310 311 // UnixRights uses the private `data` method, but I *think* this achieves the same goal. 312 offset := startLen + unix.CmsgSpace(0) 313 b[offset] = val.ToHeaderBits() 314 return b 315 } 316 317 func appendIPv6ECNMsg(b []byte, val protocol.ECN) []byte { 318 startLen := len(b) 319 const dataLen = 4 320 b = append(b, make([]byte, unix.CmsgSpace(dataLen))...) 321 h := (*unix.Cmsghdr)(unsafe.Pointer(&b[startLen])) 322 h.Level = syscall.IPPROTO_IPV6 323 h.Type = unix.IPV6_TCLASS 324 h.SetLen(unix.CmsgLen(dataLen)) 325 326 // UnixRights uses the private `data` method, but I *think* this achieves the same goal. 327 offset := startLen + unix.CmsgSpace(0) 328 b[offset] = val.ToHeaderBits() 329 return b 330 }