github.com/google/netstack@v0.0.0-20191123085552-55fcc16cd0eb/tcpip/link/sharedmem/sharedmem.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux 16 17 // Package sharedmem provides the implemention of data-link layer endpoints 18 // backed by shared memory. 19 // 20 // Shared memory endpoints can be used in the networking stack by calling New() 21 // to create a new endpoint, and then passing it as an argument to 22 // Stack.CreateNIC(). 23 package sharedmem 24 25 import ( 26 "sync" 27 "sync/atomic" 28 "syscall" 29 30 "github.com/google/netstack/tcpip" 31 "github.com/google/netstack/tcpip/buffer" 32 "github.com/google/netstack/tcpip/header" 33 "github.com/google/netstack/tcpip/link/sharedmem/queue" 34 "github.com/google/netstack/tcpip/stack" 35 "log" 36 ) 37 38 // QueueConfig holds all the file descriptors needed to describe a tx or rx 39 // queue over shared memory. It is used when creating new shared memory 40 // endpoints to describe tx and rx queues. 41 type QueueConfig struct { 42 // DataFD is a file descriptor for the file that contains the data to 43 // be transmitted via this queue. Descriptors contain offsets within 44 // this file. 45 DataFD int 46 47 // EventFD is a file descriptor for the event that is signaled when 48 // data is becomes available in this queue. 49 EventFD int 50 51 // TxPipeFD is a file descriptor for the tx pipe associated with the 52 // queue. 53 TxPipeFD int 54 55 // RxPipeFD is a file descriptor for the rx pipe associated with the 56 // queue. 57 RxPipeFD int 58 59 // SharedDataFD is a file descriptor for the file that contains shared 60 // state between the two ends of the queue. This data specifies, for 61 // example, whether EventFD signaling is enabled or disabled. 62 SharedDataFD int 63 } 64 65 type endpoint struct { 66 // mtu (maximum transmission unit) is the maximum size of a packet. 67 mtu uint32 68 69 // bufferSize is the size of each individual buffer. 70 bufferSize uint32 71 72 // addr is the local address of this endpoint. 73 addr tcpip.LinkAddress 74 75 // rx is the receive queue. 76 rx rx 77 78 // stopRequested is to be accessed atomically only, and determines if 79 // the worker goroutines should stop. 80 stopRequested uint32 81 82 // Wait group used to indicate that all workers have stopped. 83 completed sync.WaitGroup 84 85 // mu protects the following fields. 86 mu sync.Mutex 87 88 // tx is the transmit queue. 89 tx tx 90 91 // workerStarted specifies whether the worker goroutine was started. 92 workerStarted bool 93 } 94 95 // New creates a new shared-memory-based endpoint. Buffers will be broken up 96 // into buffers of "bufferSize" bytes. 97 func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (stack.LinkEndpoint, error) { 98 e := &endpoint{ 99 mtu: mtu, 100 bufferSize: bufferSize, 101 addr: addr, 102 } 103 104 if err := e.tx.init(bufferSize, &tx); err != nil { 105 return nil, err 106 } 107 108 if err := e.rx.init(bufferSize, &rx); err != nil { 109 e.tx.cleanup() 110 return nil, err 111 } 112 113 return e, nil 114 } 115 116 // Close frees all resources associated with the endpoint. 117 func (e *endpoint) Close() { 118 // Tell dispatch goroutine to stop, then write to the eventfd so that 119 // it wakes up in case it's sleeping. 120 atomic.StoreUint32(&e.stopRequested, 1) 121 syscall.Write(e.rx.eventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0}) 122 123 // Cleanup the queues inline if the worker hasn't started yet; we also 124 // know it won't start from now on because stopRequested is set to 1. 125 e.mu.Lock() 126 workerPresent := e.workerStarted 127 e.mu.Unlock() 128 129 if !workerPresent { 130 e.tx.cleanup() 131 e.rx.cleanup() 132 } 133 } 134 135 // Wait implements stack.LinkEndpoint.Wait. It waits until all workers have 136 // stopped after a Close() call. 137 func (e *endpoint) Wait() { 138 e.completed.Wait() 139 } 140 141 // Attach implements stack.LinkEndpoint.Attach. It launches the goroutine that 142 // reads packets from the rx queue. 143 func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { 144 e.mu.Lock() 145 if !e.workerStarted && atomic.LoadUint32(&e.stopRequested) == 0 { 146 e.workerStarted = true 147 e.completed.Add(1) 148 // Link endpoints are not savable. When transportation endpoints 149 // are saved, they stop sending outgoing packets and all 150 // incoming packets are rejected. 151 go e.dispatchLoop(dispatcher) 152 } 153 e.mu.Unlock() 154 } 155 156 // IsAttached implements stack.LinkEndpoint.IsAttached. 157 func (e *endpoint) IsAttached() bool { 158 e.mu.Lock() 159 defer e.mu.Unlock() 160 return e.workerStarted 161 } 162 163 // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized 164 // during construction. 165 func (e *endpoint) MTU() uint32 { 166 return e.mtu - header.EthernetMinimumSize 167 } 168 169 // Capabilities implements stack.LinkEndpoint.Capabilities. 170 func (*endpoint) Capabilities() stack.LinkEndpointCapabilities { 171 return 0 172 } 173 174 // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It returns the 175 // ethernet frame header size. 176 func (*endpoint) MaxHeaderLength() uint16 { 177 return header.EthernetMinimumSize 178 } 179 180 // LinkAddress implements stack.LinkEndpoint.LinkAddress. It returns the local 181 // link address. 182 func (e *endpoint) LinkAddress() tcpip.LinkAddress { 183 return e.addr 184 } 185 186 // WritePacket writes outbound packets to the file descriptor. If it is not 187 // currently writable, the packet is dropped. 188 func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error { 189 // Add the ethernet header here. 190 eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize)) 191 pkt.LinkHeader = buffer.View(eth) 192 ethHdr := &header.EthernetFields{ 193 DstAddr: r.RemoteLinkAddress, 194 Type: protocol, 195 } 196 if r.LocalLinkAddress != "" { 197 ethHdr.SrcAddr = r.LocalLinkAddress 198 } else { 199 ethHdr.SrcAddr = e.addr 200 } 201 eth.Encode(ethHdr) 202 203 v := pkt.Data.ToView() 204 // Transmit the packet. 205 e.mu.Lock() 206 ok := e.tx.transmit(pkt.Header.View(), v) 207 e.mu.Unlock() 208 209 if !ok { 210 return tcpip.ErrWouldBlock 211 } 212 213 return nil 214 } 215 216 // WritePackets implements stack.LinkEndpoint.WritePackets. 217 func (e *endpoint) WritePackets(r *stack.Route, _ *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { 218 panic("not implemented") 219 } 220 221 // WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket. 222 func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { 223 v := vv.ToView() 224 // Transmit the packet. 225 e.mu.Lock() 226 ok := e.tx.transmit(v, buffer.View{}) 227 e.mu.Unlock() 228 229 if !ok { 230 return tcpip.ErrWouldBlock 231 } 232 233 return nil 234 } 235 236 // dispatchLoop reads packets from the rx queue in a loop and dispatches them 237 // to the network stack. 238 func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) { 239 // Post initial set of buffers. 240 limit := e.rx.q.PostedBuffersLimit() 241 if l := uint64(len(e.rx.data)) / uint64(e.bufferSize); limit > l { 242 limit = l 243 } 244 for i := uint64(0); i < limit; i++ { 245 b := queue.RxBuffer{ 246 Offset: i * uint64(e.bufferSize), 247 Size: e.bufferSize, 248 ID: i, 249 } 250 if !e.rx.q.PostBuffers([]queue.RxBuffer{b}) { 251 log.Printf("Unable to post %v-th buffer", i) 252 } 253 } 254 255 // Read in a loop until a stop is requested. 256 var rxb []queue.RxBuffer 257 for atomic.LoadUint32(&e.stopRequested) == 0 { 258 var n uint32 259 rxb, n = e.rx.postAndReceive(rxb, &e.stopRequested) 260 261 // Copy data from the shared area to its own buffer, then 262 // prepare to repost the buffer. 263 b := make([]byte, n) 264 offset := uint32(0) 265 for i := range rxb { 266 copy(b[offset:], e.rx.data[rxb[i].Offset:][:rxb[i].Size]) 267 offset += rxb[i].Size 268 269 rxb[i].Size = e.bufferSize 270 } 271 272 if n < header.EthernetMinimumSize { 273 continue 274 } 275 276 // Send packet up the stack. 277 eth := header.Ethernet(b[:header.EthernetMinimumSize]) 278 d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), tcpip.PacketBuffer{ 279 Data: buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(), 280 LinkHeader: buffer.View(eth), 281 }) 282 } 283 284 // Clean state. 285 e.tx.cleanup() 286 e.rx.cleanup() 287 288 e.completed.Done() 289 }