github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/sharedmem/sharedmem.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux 16 17 // Package sharedmem provides the implemention of data-link layer endpoints 18 // backed by shared memory. 19 // 20 // Shared memory endpoints can be used in the networking stack by calling New() 21 // to create a new endpoint, and then passing it as an argument to 22 // Stack.CreateNIC(). 23 package sharedmem 24 25 import ( 26 "sync/atomic" 27 28 "golang.org/x/sys/unix" 29 "github.com/SagerNet/gvisor/pkg/log" 30 "github.com/SagerNet/gvisor/pkg/sync" 31 "github.com/SagerNet/gvisor/pkg/tcpip" 32 "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 33 "github.com/SagerNet/gvisor/pkg/tcpip/header" 34 "github.com/SagerNet/gvisor/pkg/tcpip/link/sharedmem/queue" 35 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 36 ) 37 38 // QueueConfig holds all the file descriptors needed to describe a tx or rx 39 // queue over shared memory. It is used when creating new shared memory 40 // endpoints to describe tx and rx queues. 41 type QueueConfig struct { 42 // DataFD is a file descriptor for the file that contains the data to 43 // be transmitted via this queue. Descriptors contain offsets within 44 // this file. 45 DataFD int 46 47 // EventFD is a file descriptor for the event that is signaled when 48 // data is becomes available in this queue. 49 EventFD int 50 51 // TxPipeFD is a file descriptor for the tx pipe associated with the 52 // queue. 53 TxPipeFD int 54 55 // RxPipeFD is a file descriptor for the rx pipe associated with the 56 // queue. 57 RxPipeFD int 58 59 // SharedDataFD is a file descriptor for the file that contains shared 60 // state between the two ends of the queue. This data specifies, for 61 // example, whether EventFD signaling is enabled or disabled. 62 SharedDataFD int 63 } 64 65 type endpoint struct { 66 // mtu (maximum transmission unit) is the maximum size of a packet. 67 mtu uint32 68 69 // bufferSize is the size of each individual buffer. 70 bufferSize uint32 71 72 // addr is the local address of this endpoint. 73 addr tcpip.LinkAddress 74 75 // rx is the receive queue. 76 rx rx 77 78 // stopRequested is to be accessed atomically only, and determines if 79 // the worker goroutines should stop. 80 stopRequested uint32 81 82 // Wait group used to indicate that all workers have stopped. 83 completed sync.WaitGroup 84 85 // mu protects the following fields. 86 mu sync.Mutex 87 88 // tx is the transmit queue. 89 tx tx 90 91 // workerStarted specifies whether the worker goroutine was started. 92 workerStarted bool 93 } 94 95 // New creates a new shared-memory-based endpoint. Buffers will be broken up 96 // into buffers of "bufferSize" bytes. 97 func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (stack.LinkEndpoint, error) { 98 e := &endpoint{ 99 mtu: mtu, 100 bufferSize: bufferSize, 101 addr: addr, 102 } 103 104 if err := e.tx.init(bufferSize, &tx); err != nil { 105 return nil, err 106 } 107 108 if err := e.rx.init(bufferSize, &rx); err != nil { 109 e.tx.cleanup() 110 return nil, err 111 } 112 113 return e, nil 114 } 115 116 // Close frees all resources associated with the endpoint. 117 func (e *endpoint) Close() { 118 // Tell dispatch goroutine to stop, then write to the eventfd so that 119 // it wakes up in case it's sleeping. 120 atomic.StoreUint32(&e.stopRequested, 1) 121 unix.Write(e.rx.eventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0}) 122 123 // Cleanup the queues inline if the worker hasn't started yet; we also 124 // know it won't start from now on because stopRequested is set to 1. 125 e.mu.Lock() 126 workerPresent := e.workerStarted 127 e.mu.Unlock() 128 129 if !workerPresent { 130 e.tx.cleanup() 131 e.rx.cleanup() 132 } 133 } 134 135 // Wait implements stack.LinkEndpoint.Wait. It waits until all workers have 136 // stopped after a Close() call. 137 func (e *endpoint) Wait() { 138 e.completed.Wait() 139 } 140 141 // Attach implements stack.LinkEndpoint.Attach. It launches the goroutine that 142 // reads packets from the rx queue. 143 func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { 144 e.mu.Lock() 145 if !e.workerStarted && atomic.LoadUint32(&e.stopRequested) == 0 { 146 e.workerStarted = true 147 e.completed.Add(1) 148 // Link endpoints are not savable. When transportation endpoints 149 // are saved, they stop sending outgoing packets and all 150 // incoming packets are rejected. 151 go e.dispatchLoop(dispatcher) // S/R-SAFE: see above. 152 } 153 e.mu.Unlock() 154 } 155 156 // IsAttached implements stack.LinkEndpoint.IsAttached. 157 func (e *endpoint) IsAttached() bool { 158 e.mu.Lock() 159 defer e.mu.Unlock() 160 return e.workerStarted 161 } 162 163 // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized 164 // during construction. 165 func (e *endpoint) MTU() uint32 { 166 return e.mtu - header.EthernetMinimumSize 167 } 168 169 // Capabilities implements stack.LinkEndpoint.Capabilities. 170 func (*endpoint) Capabilities() stack.LinkEndpointCapabilities { 171 return 0 172 } 173 174 // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It returns the 175 // ethernet frame header size. 176 func (*endpoint) MaxHeaderLength() uint16 { 177 return header.EthernetMinimumSize 178 } 179 180 // LinkAddress implements stack.LinkEndpoint.LinkAddress. It returns the local 181 // link address. 182 func (e *endpoint) LinkAddress() tcpip.LinkAddress { 183 return e.addr 184 } 185 186 // AddHeader implements stack.LinkEndpoint.AddHeader. 187 func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { 188 // Add ethernet header if needed. 189 eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize)) 190 ethHdr := &header.EthernetFields{ 191 DstAddr: remote, 192 Type: protocol, 193 } 194 195 // Preserve the src address if it's set in the route. 196 if local != "" { 197 ethHdr.SrcAddr = local 198 } else { 199 ethHdr.SrcAddr = e.addr 200 } 201 eth.Encode(ethHdr) 202 } 203 204 // WritePacket writes outbound packets to the file descriptor. If it is not 205 // currently writable, the packet is dropped. 206 func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { 207 e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) 208 209 views := pkt.Views() 210 // Transmit the packet. 211 e.mu.Lock() 212 ok := e.tx.transmit(views...) 213 e.mu.Unlock() 214 215 if !ok { 216 return &tcpip.ErrWouldBlock{} 217 } 218 219 return nil 220 } 221 222 // WritePackets implements stack.LinkEndpoint.WritePackets. 223 func (*endpoint) WritePackets(stack.RouteInfo, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) { 224 panic("not implemented") 225 } 226 227 // dispatchLoop reads packets from the rx queue in a loop and dispatches them 228 // to the network stack. 229 func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) { 230 // Post initial set of buffers. 231 limit := e.rx.q.PostedBuffersLimit() 232 if l := uint64(len(e.rx.data)) / uint64(e.bufferSize); limit > l { 233 limit = l 234 } 235 for i := uint64(0); i < limit; i++ { 236 b := queue.RxBuffer{ 237 Offset: i * uint64(e.bufferSize), 238 Size: e.bufferSize, 239 ID: i, 240 } 241 if !e.rx.q.PostBuffers([]queue.RxBuffer{b}) { 242 log.Warningf("Unable to post %v-th buffer", i) 243 } 244 } 245 246 // Read in a loop until a stop is requested. 247 var rxb []queue.RxBuffer 248 for atomic.LoadUint32(&e.stopRequested) == 0 { 249 var n uint32 250 rxb, n = e.rx.postAndReceive(rxb, &e.stopRequested) 251 252 // Copy data from the shared area to its own buffer, then 253 // prepare to repost the buffer. 254 b := make([]byte, n) 255 offset := uint32(0) 256 for i := range rxb { 257 copy(b[offset:], e.rx.data[rxb[i].Offset:][:rxb[i].Size]) 258 offset += rxb[i].Size 259 260 rxb[i].Size = e.bufferSize 261 } 262 263 pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ 264 Data: buffer.View(b).ToVectorisedView(), 265 }) 266 267 hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize) 268 if !ok { 269 continue 270 } 271 eth := header.Ethernet(hdr) 272 273 // Send packet up the stack. 274 d.DeliverNetworkPacket(eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), pkt) 275 } 276 277 // Clean state. 278 e.tx.cleanup() 279 e.rx.cleanup() 280 281 e.completed.Done() 282 } 283 284 // ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType 285 func (*endpoint) ARPHardwareType() header.ARPHardwareType { 286 return header.ARPHardwareEther 287 }