github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/unet/unet_unsafe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package unet 16 17 import ( 18 "io" 19 "sync/atomic" 20 "unsafe" 21 22 "golang.org/x/sys/unix" 23 ) 24 25 // wait blocks until the socket FD is ready for reading or writing, depending 26 // on the value of write. 27 // 28 // Returns errClosing if the Socket is in the process of closing. 29 func (s *Socket) wait(write bool) error { 30 for { 31 // Checking the FD on each loop is not strictly necessary, it 32 // just avoids an extra poll call. 33 fd := atomic.LoadInt32(&s.fd) 34 if fd < 0 { 35 return errClosing 36 } 37 38 events := []unix.PollFd{ 39 { 40 // The actual socket FD. 41 Fd: fd, 42 Events: unix.POLLIN, 43 }, 44 { 45 // The eventfd, signaled when we are closing. 46 Fd: int32(s.efd), 47 Events: unix.POLLIN, 48 }, 49 } 50 if write { 51 events[0].Events = unix.POLLOUT 52 } 53 54 _, _, e := unix.Syscall6(unix.SYS_PPOLL, uintptr(unsafe.Pointer(&events[0])), 2, 0, 0, 0, 0) 55 if e == unix.EINTR { 56 continue 57 } 58 if e != 0 { 59 return e 60 } 61 62 if events[1].Revents&unix.POLLIN == unix.POLLIN { 63 // eventfd signaled, we're closing. 64 return errClosing 65 } 66 67 return nil 68 } 69 } 70 71 // buildIovec builds an iovec slice from the given []byte slice. 72 // 73 // iovecs is used as an initial slice, to avoid excessive allocations. 74 func buildIovec(bufs [][]byte, iovecs []unix.Iovec) ([]unix.Iovec, int) { 75 var length int 76 for i := range bufs { 77 if l := len(bufs[i]); l > 0 { 78 iovecs = append(iovecs, unix.Iovec{ 79 Base: &bufs[i][0], 80 Len: uint64(l), 81 }) 82 length += l 83 } 84 } 85 return iovecs, length 86 } 87 88 // ReadVec reads into the pre-allocated bufs. Returns bytes read. 89 // 90 // The pre-allocatted space used by ReadVec is based upon slice lengths. 91 // 92 // This function is not guaranteed to read all available data, it 93 // returns as soon as a single recvmsg call succeeds. 94 func (r *SocketReader) ReadVec(bufs [][]byte) (int, error) { 95 iovecs, length := buildIovec(bufs, make([]unix.Iovec, 0, 2)) 96 97 var msg unix.Msghdr 98 if len(r.source) != 0 { 99 msg.Name = &r.source[0] 100 msg.Namelen = uint32(len(r.source)) 101 } 102 103 if len(r.ControlMessage) != 0 { 104 msg.Control = &r.ControlMessage[0] 105 msg.Controllen = uint64(len(r.ControlMessage)) 106 } 107 108 if len(iovecs) != 0 { 109 msg.Iov = &iovecs[0] 110 msg.Iovlen = uint64(len(iovecs)) 111 } 112 113 // n is the bytes received. 114 var n uintptr 115 116 fd, ok := r.socket.enterFD() 117 if !ok { 118 return 0, unix.EBADF 119 } 120 // Leave on returns below. 121 for { 122 var e unix.Errno 123 124 // Try a non-blocking recv first, so we don't give up the go runtime M. 125 n, _, e = unix.RawSyscall(unix.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_TRUNC) 126 if e == 0 { 127 break 128 } 129 if e == unix.EINTR { 130 continue 131 } 132 if !r.blocking { 133 r.socket.gate.Leave() 134 return 0, e 135 } 136 if e != unix.EAGAIN && e != unix.EWOULDBLOCK { 137 r.socket.gate.Leave() 138 return 0, e 139 } 140 141 // Wait for the socket to become readable. 142 err := r.socket.wait(false) 143 if err == errClosing { 144 err = unix.EBADF 145 } 146 if err != nil { 147 r.socket.gate.Leave() 148 return 0, err 149 } 150 } 151 152 r.socket.gate.Leave() 153 154 if msg.Controllen < uint64(len(r.ControlMessage)) { 155 r.ControlMessage = r.ControlMessage[:msg.Controllen] 156 } 157 158 if msg.Namelen < uint32(len(r.source)) { 159 r.source = r.source[:msg.Namelen] 160 } 161 162 // All unet sockets are SOCK_STREAM or SOCK_SEQPACKET, both of which 163 // indicate that the other end is closed by returning a 0 length read 164 // with no error. 165 if n == 0 { 166 return 0, io.EOF 167 } 168 169 if r.race != nil { 170 // See comments on Socket.race. 171 atomic.AddInt32(r.race, 1) 172 } 173 174 if int(n) > length { 175 return length, errMessageTruncated 176 } 177 178 return int(n), nil 179 } 180 181 // WriteVec writes the bufs to the socket. Returns bytes written. 182 // 183 // This function is not guaranteed to send all data, it returns 184 // as soon as a single sendmsg call succeeds. 185 func (w *SocketWriter) WriteVec(bufs [][]byte) (int, error) { 186 iovecs, _ := buildIovec(bufs, make([]unix.Iovec, 0, 2)) 187 188 if w.race != nil { 189 // See comments on Socket.race. 190 atomic.AddInt32(w.race, 1) 191 } 192 193 var msg unix.Msghdr 194 if len(w.to) != 0 { 195 msg.Name = &w.to[0] 196 msg.Namelen = uint32(len(w.to)) 197 } 198 199 if len(w.ControlMessage) != 0 { 200 msg.Control = &w.ControlMessage[0] 201 msg.Controllen = uint64(len(w.ControlMessage)) 202 } 203 204 if len(iovecs) > 0 { 205 msg.Iov = &iovecs[0] 206 msg.Iovlen = uint64(len(iovecs)) 207 } 208 209 fd, ok := w.socket.enterFD() 210 if !ok { 211 return 0, unix.EBADF 212 } 213 // Leave on returns below. 214 for { 215 // Try a non-blocking send first, so we don't give up the go runtime M. 216 n, _, e := unix.RawSyscall(unix.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_NOSIGNAL) 217 if e == 0 { 218 w.socket.gate.Leave() 219 return int(n), nil 220 } 221 if e == unix.EINTR { 222 continue 223 } 224 if !w.blocking { 225 w.socket.gate.Leave() 226 return 0, e 227 } 228 if e != unix.EAGAIN && e != unix.EWOULDBLOCK { 229 w.socket.gate.Leave() 230 return 0, e 231 } 232 233 // Wait for the socket to become writeable. 234 err := w.socket.wait(true) 235 if err == errClosing { 236 err = unix.EBADF 237 } 238 if err != nil { 239 w.socket.gate.Leave() 240 return 0, err 241 } 242 } 243 // Unreachable, no s.gate.Leave needed. 244 } 245 246 // getsockopt issues a getsockopt unix. 247 func getsockopt(fd int, level int, optname int, buf []byte) (uint32, error) { 248 l := uint32(len(buf)) 249 _, _, e := unix.RawSyscall6(unix.SYS_GETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)), 0) 250 if e != 0 { 251 return 0, e 252 } 253 254 return l, nil 255 } 256 257 // setsockopt issues a setsockopt unix. 258 func setsockopt(fd int, level int, optname int, buf []byte) error { 259 _, _, e := unix.RawSyscall6(unix.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf)), 0) 260 if e != 0 { 261 return e 262 } 263 264 return nil 265 } 266 267 // getsockname issues a getsockname unix. 268 func getsockname(fd int, buf []byte) (uint32, error) { 269 l := uint32(len(buf)) 270 _, _, e := unix.RawSyscall(unix.SYS_GETSOCKNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l))) 271 if e != 0 { 272 return 0, e 273 } 274 275 return l, nil 276 } 277 278 // getpeername issues a getpeername unix. 279 func getpeername(fd int, buf []byte) (uint32, error) { 280 l := uint32(len(buf)) 281 _, _, e := unix.RawSyscall(unix.SYS_GETPEERNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l))) 282 if e != 0 { 283 return 0, e 284 } 285 286 return l, nil 287 }