github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/unet/unet_unsafe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package unet 16 17 import ( 18 "io" 19 "unsafe" 20 21 "golang.org/x/sys/unix" 22 ) 23 24 // wait blocks until the socket FD is ready for reading or writing, depending 25 // on the value of write. 26 // 27 // Returns errClosing if the Socket is in the process of closing. 28 func (s *Socket) wait(write bool) error { 29 for { 30 // Checking the FD on each loop is not strictly necessary, it 31 // just avoids an extra poll call. 32 fd := s.fd.Load() 33 if fd < 0 { 34 return errClosing 35 } 36 37 events := []unix.PollFd{ 38 { 39 // The actual socket FD. 40 Fd: fd, 41 Events: unix.POLLIN, 42 }, 43 { 44 // The eventfd, signaled when we are closing. 45 Fd: int32(s.efd.FD()), 46 Events: unix.POLLIN, 47 }, 48 } 49 if write { 50 events[0].Events = unix.POLLOUT 51 } 52 53 _, _, e := unix.Syscall6(unix.SYS_PPOLL, uintptr(unsafe.Pointer(&events[0])), 2, 0, 0, 0, 0) 54 if e == unix.EINTR { 55 continue 56 } 57 if e != 0 { 58 return e 59 } 60 61 if events[1].Revents&unix.POLLIN == unix.POLLIN { 62 // eventfd signaled, we're closing. 63 return errClosing 64 } 65 66 return nil 67 } 68 } 69 70 // buildIovec builds an iovec slice from the given []byte slice. 71 // 72 // iovecs is used as an initial slice, to avoid excessive allocations. 73 func buildIovec(bufs [][]byte, iovecs []unix.Iovec) ([]unix.Iovec, int) { 74 var length int 75 for i := range bufs { 76 if l := len(bufs[i]); l > 0 { 77 iovecs = append(iovecs, unix.Iovec{ 78 Base: &bufs[i][0], 79 Len: uint64(l), 80 }) 81 length += l 82 } 83 } 84 return iovecs, length 85 } 86 87 // ReadVec reads into the pre-allocated bufs. Returns bytes read. 88 // 89 // The pre-allocatted space used by ReadVec is based upon slice lengths. 90 // 91 // This function is not guaranteed to read all available data, it 92 // returns as soon as a single recvmsg call succeeds. 93 func (r *SocketReader) ReadVec(bufs [][]byte) (int, error) { 94 iovecs, length := buildIovec(bufs, make([]unix.Iovec, 0, 2)) 95 96 var msg unix.Msghdr 97 if len(r.source) != 0 { 98 msg.Name = &r.source[0] 99 msg.Namelen = uint32(len(r.source)) 100 } 101 102 if len(r.ControlMessage) != 0 { 103 msg.Control = &r.ControlMessage[0] 104 msg.Controllen = uint64(len(r.ControlMessage)) 105 } 106 107 if len(iovecs) != 0 { 108 msg.Iov = &iovecs[0] 109 msg.Iovlen = uint64(len(iovecs)) 110 } 111 112 // n is the bytes received. 113 var n uintptr 114 115 fd, ok := r.socket.enterFD() 116 if !ok { 117 return 0, unix.EBADF 118 } 119 // Leave on returns below. 120 for { 121 var e unix.Errno 122 123 // Try a non-blocking recv first, so we don't give up the go runtime M. 124 n, _, e = unix.RawSyscall(unix.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_TRUNC) 125 if e == 0 { 126 break 127 } 128 if e == unix.EINTR { 129 continue 130 } 131 if !r.blocking { 132 r.socket.gate.Leave() 133 return 0, e 134 } 135 if e != unix.EAGAIN && e != unix.EWOULDBLOCK { 136 r.socket.gate.Leave() 137 return 0, e 138 } 139 140 // Wait for the socket to become readable. 141 err := r.socket.wait(false) 142 if err == errClosing { 143 err = unix.EBADF 144 } 145 if err != nil { 146 r.socket.gate.Leave() 147 return 0, err 148 } 149 } 150 151 r.socket.gate.Leave() 152 153 if msg.Controllen < uint64(len(r.ControlMessage)) { 154 r.ControlMessage = r.ControlMessage[:msg.Controllen] 155 } 156 157 if msg.Namelen < uint32(len(r.source)) { 158 r.source = r.source[:msg.Namelen] 159 } 160 161 // All unet sockets are SOCK_STREAM or SOCK_SEQPACKET, both of which 162 // indicate that the other end is closed by returning a 0 length read 163 // with no error. 164 if n == 0 { 165 return 0, io.EOF 166 } 167 168 if r.race != nil { 169 // See comments on Socket.race. 170 r.race.Add(1) 171 } 172 173 if int(n) > length { 174 return length, errMessageTruncated 175 } 176 177 return int(n), nil 178 } 179 180 // WriteVec writes the bufs to the socket. Returns bytes written. 181 // 182 // This function is not guaranteed to send all data, it returns 183 // as soon as a single sendmsg call succeeds. 184 func (w *SocketWriter) WriteVec(bufs [][]byte) (int, error) { 185 iovecs, _ := buildIovec(bufs, make([]unix.Iovec, 0, 2)) 186 187 if w.race != nil { 188 // See comments on Socket.race. 189 w.race.Add(1) 190 } 191 192 var msg unix.Msghdr 193 if len(w.to) != 0 { 194 msg.Name = &w.to[0] 195 msg.Namelen = uint32(len(w.to)) 196 } 197 198 if len(w.ControlMessage) != 0 { 199 msg.Control = &w.ControlMessage[0] 200 msg.Controllen = uint64(len(w.ControlMessage)) 201 } 202 203 if len(iovecs) > 0 { 204 msg.Iov = &iovecs[0] 205 msg.Iovlen = uint64(len(iovecs)) 206 } 207 208 fd, ok := w.socket.enterFD() 209 if !ok { 210 return 0, unix.EBADF 211 } 212 // Leave on returns below. 213 for { 214 // Try a non-blocking send first, so we don't give up the go runtime M. 215 n, _, e := unix.RawSyscall(unix.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_NOSIGNAL) 216 if e == 0 { 217 w.socket.gate.Leave() 218 return int(n), nil 219 } 220 if e == unix.EINTR { 221 continue 222 } 223 if !w.blocking { 224 w.socket.gate.Leave() 225 return 0, e 226 } 227 if e != unix.EAGAIN && e != unix.EWOULDBLOCK { 228 w.socket.gate.Leave() 229 return 0, e 230 } 231 232 // Wait for the socket to become writeable. 233 err := w.socket.wait(true) 234 if err == errClosing { 235 err = unix.EBADF 236 } 237 if err != nil { 238 w.socket.gate.Leave() 239 return 0, err 240 } 241 } 242 // Unreachable, no s.gate.Leave needed. 243 } 244 245 // getsockopt issues a getsockopt unix. 246 func getsockopt(fd int, level int, optname int, buf []byte) (uint32, error) { 247 l := uint32(len(buf)) 248 _, _, e := unix.RawSyscall6(unix.SYS_GETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)), 0) 249 if e != 0 { 250 return 0, e 251 } 252 253 return l, nil 254 } 255 256 // setsockopt issues a setsockopt unix. 257 func setsockopt(fd int, level int, optname int, buf []byte) error { 258 _, _, e := unix.RawSyscall6(unix.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf)), 0) 259 if e != 0 { 260 return e 261 } 262 263 return nil 264 } 265 266 // getsockname issues a getsockname unix. 267 func getsockname(fd int, buf []byte) (uint32, error) { 268 l := uint32(len(buf)) 269 _, _, e := unix.RawSyscall(unix.SYS_GETSOCKNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l))) 270 if e != 0 { 271 return 0, e 272 } 273 274 return l, nil 275 } 276 277 // getpeername issues a getpeername unix. 278 func getpeername(fd int, buf []byte) (uint32, error) { 279 l := uint32(len(buf)) 280 _, _, e := unix.RawSyscall(unix.SYS_GETPEERNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l))) 281 if e != 0 { 282 return 0, e 283 } 284 285 return l, nil 286 }