github.com/FlowerWrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/link/sharedmem/sharedmem.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  // Package sharedmem provides the implemention of data-link layer endpoints
    18  // backed by shared memory.
    19  //
    20  // Shared memory endpoints can be used in the networking stack by calling New()
    21  // to create a new endpoint, and then passing it as an argument to
    22  // Stack.CreateNIC().
    23  package sharedmem
    24  
    25  import (
    26  	"sync"
    27  	"sync/atomic"
    28  	"syscall"
    29  
    30  	"github.com/FlowerWrong/netstack/tcpip"
    31  	"github.com/FlowerWrong/netstack/tcpip/buffer"
    32  	"github.com/FlowerWrong/netstack/tcpip/header"
    33  	"github.com/FlowerWrong/netstack/tcpip/link/sharedmem/queue"
    34  	"github.com/FlowerWrong/netstack/tcpip/stack"
    35  	"log"
    36  )
    37  
    38  // QueueConfig holds all the file descriptors needed to describe a tx or rx
    39  // queue over shared memory. It is used when creating new shared memory
    40  // endpoints to describe tx and rx queues.
    41  type QueueConfig struct {
    42  	// DataFD is a file descriptor for the file that contains the data to
    43  	// be transmitted via this queue. Descriptors contain offsets within
    44  	// this file.
    45  	DataFD int
    46  
    47  	// EventFD is a file descriptor for the event that is signaled when
    48  	// data is becomes available in this queue.
    49  	EventFD int
    50  
    51  	// TxPipeFD is a file descriptor for the tx pipe associated with the
    52  	// queue.
    53  	TxPipeFD int
    54  
    55  	// RxPipeFD is a file descriptor for the rx pipe associated with the
    56  	// queue.
    57  	RxPipeFD int
    58  
    59  	// SharedDataFD is a file descriptor for the file that contains shared
    60  	// state between the two ends of the queue. This data specifies, for
    61  	// example, whether EventFD signaling is enabled or disabled.
    62  	SharedDataFD int
    63  }
    64  
    65  type endpoint struct {
    66  	// mtu (maximum transmission unit) is the maximum size of a packet.
    67  	mtu uint32
    68  
    69  	// bufferSize is the size of each individual buffer.
    70  	bufferSize uint32
    71  
    72  	// addr is the local address of this endpoint.
    73  	addr tcpip.LinkAddress
    74  
    75  	// rx is the receive queue.
    76  	rx rx
    77  
    78  	// stopRequested is to be accessed atomically only, and determines if
    79  	// the worker goroutines should stop.
    80  	stopRequested uint32
    81  
    82  	// Wait group used to indicate that all workers have stopped.
    83  	completed sync.WaitGroup
    84  
    85  	// mu protects the following fields.
    86  	mu sync.Mutex
    87  
    88  	// tx is the transmit queue.
    89  	tx tx
    90  
    91  	// workerStarted specifies whether the worker goroutine was started.
    92  	workerStarted bool
    93  }
    94  
    95  // New creates a new shared-memory-based endpoint. Buffers will be broken up
    96  // into buffers of "bufferSize" bytes.
    97  func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (stack.LinkEndpoint, error) {
    98  	e := &endpoint{
    99  		mtu:        mtu,
   100  		bufferSize: bufferSize,
   101  		addr:       addr,
   102  	}
   103  
   104  	if err := e.tx.init(bufferSize, &tx); err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	if err := e.rx.init(bufferSize, &rx); err != nil {
   109  		e.tx.cleanup()
   110  		return nil, err
   111  	}
   112  
   113  	return e, nil
   114  }
   115  
   116  // Close frees all resources associated with the endpoint.
   117  func (e *endpoint) Close() {
   118  	// Tell dispatch goroutine to stop, then write to the eventfd so that
   119  	// it wakes up in case it's sleeping.
   120  	atomic.StoreUint32(&e.stopRequested, 1)
   121  	syscall.Write(e.rx.eventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
   122  
   123  	// Cleanup the queues inline if the worker hasn't started yet; we also
   124  	// know it won't start from now on because stopRequested is set to 1.
   125  	e.mu.Lock()
   126  	workerPresent := e.workerStarted
   127  	e.mu.Unlock()
   128  
   129  	if !workerPresent {
   130  		e.tx.cleanup()
   131  		e.rx.cleanup()
   132  	}
   133  }
   134  
   135  // Wait implements stack.LinkEndpoint.Wait. It waits until all workers have
   136  // stopped after a Close() call.
   137  func (e *endpoint) Wait() {
   138  	e.completed.Wait()
   139  }
   140  
   141  // Attach implements stack.LinkEndpoint.Attach. It launches the goroutine that
   142  // reads packets from the rx queue.
   143  func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
   144  	e.mu.Lock()
   145  	if !e.workerStarted && atomic.LoadUint32(&e.stopRequested) == 0 {
   146  		e.workerStarted = true
   147  		e.completed.Add(1)
   148  		// Link endpoints are not savable. When transportation endpoints
   149  		// are saved, they stop sending outgoing packets and all
   150  		// incoming packets are rejected.
   151  		go e.dispatchLoop(dispatcher)
   152  	}
   153  	e.mu.Unlock()
   154  }
   155  
   156  // IsAttached implements stack.LinkEndpoint.IsAttached.
   157  func (e *endpoint) IsAttached() bool {
   158  	e.mu.Lock()
   159  	defer e.mu.Unlock()
   160  	return e.workerStarted
   161  }
   162  
   163  // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized
   164  // during construction.
   165  func (e *endpoint) MTU() uint32 {
   166  	return e.mtu - header.EthernetMinimumSize
   167  }
   168  
   169  // Capabilities implements stack.LinkEndpoint.Capabilities.
   170  func (*endpoint) Capabilities() stack.LinkEndpointCapabilities {
   171  	return 0
   172  }
   173  
   174  // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It returns the
   175  // ethernet frame header size.
   176  func (*endpoint) MaxHeaderLength() uint16 {
   177  	return header.EthernetMinimumSize
   178  }
   179  
   180  // LinkAddress implements stack.LinkEndpoint.LinkAddress. It returns the local
   181  // link address.
   182  func (e *endpoint) LinkAddress() tcpip.LinkAddress {
   183  	return e.addr
   184  }
   185  
   186  // WritePacket writes outbound packets to the file descriptor. If it is not
   187  // currently writable, the packet is dropped.
   188  func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
   189  	// Add the ethernet header here.
   190  	eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
   191  	ethHdr := &header.EthernetFields{
   192  		DstAddr: r.RemoteLinkAddress,
   193  		Type:    protocol,
   194  	}
   195  	if r.LocalLinkAddress != "" {
   196  		ethHdr.SrcAddr = r.LocalLinkAddress
   197  	} else {
   198  		ethHdr.SrcAddr = e.addr
   199  	}
   200  	eth.Encode(ethHdr)
   201  
   202  	v := payload.ToView()
   203  	// Transmit the packet.
   204  	e.mu.Lock()
   205  	ok := e.tx.transmit(hdr.View(), v)
   206  	e.mu.Unlock()
   207  
   208  	if !ok {
   209  		return tcpip.ErrWouldBlock
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  // dispatchLoop reads packets from the rx queue in a loop and dispatches them
   216  // to the network stack.
   217  func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
   218  	// Post initial set of buffers.
   219  	limit := e.rx.q.PostedBuffersLimit()
   220  	if l := uint64(len(e.rx.data)) / uint64(e.bufferSize); limit > l {
   221  		limit = l
   222  	}
   223  	for i := uint64(0); i < limit; i++ {
   224  		b := queue.RxBuffer{
   225  			Offset: i * uint64(e.bufferSize),
   226  			Size:   e.bufferSize,
   227  			ID:     i,
   228  		}
   229  		if !e.rx.q.PostBuffers([]queue.RxBuffer{b}) {
   230  			log.Printf("Unable to post %v-th buffer", i)
   231  		}
   232  	}
   233  
   234  	// Read in a loop until a stop is requested.
   235  	var rxb []queue.RxBuffer
   236  	for atomic.LoadUint32(&e.stopRequested) == 0 {
   237  		var n uint32
   238  		rxb, n = e.rx.postAndReceive(rxb, &e.stopRequested)
   239  
   240  		// Copy data from the shared area to its own buffer, then
   241  		// prepare to repost the buffer.
   242  		b := make([]byte, n)
   243  		offset := uint32(0)
   244  		for i := range rxb {
   245  			copy(b[offset:], e.rx.data[rxb[i].Offset:][:rxb[i].Size])
   246  			offset += rxb[i].Size
   247  
   248  			rxb[i].Size = e.bufferSize
   249  		}
   250  
   251  		if n < header.EthernetMinimumSize {
   252  			continue
   253  		}
   254  
   255  		// Send packet up the stack.
   256  		eth := header.Ethernet(b)
   257  		d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView())
   258  	}
   259  
   260  	// Clean state.
   261  	e.tx.cleanup()
   262  	e.rx.cleanup()
   263  
   264  	e.completed.Done()
   265  }