github.com/polevpn/netstack@v1.10.9/tcpip/transport/tcp/protocol.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tcp contains the implementation of the TCP transport protocol. To use
    16  // it in the networking stack, this package must be added to the project, and
    17  // activated on the stack by passing tcp.NewProtocol() as one of the
    18  // transport protocols when calling stack.New(). Then endpoints can be created
    19  // by passing tcp.ProtocolNumber as the transport protocol number when calling
    20  // Stack.NewEndpoint().
    21  package tcp
    22  
    23  import (
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/polevpn/netstack/tcpip"
    29  	"github.com/polevpn/netstack/tcpip/buffer"
    30  	"github.com/polevpn/netstack/tcpip/header"
    31  	"github.com/polevpn/netstack/tcpip/seqnum"
    32  	"github.com/polevpn/netstack/tcpip/stack"
    33  	"github.com/polevpn/netstack/tcpip/transport/raw"
    34  	"github.com/polevpn/netstack/waiter"
    35  )
    36  
    37  const (
    38  	// ProtocolNumber is the tcp protocol number.
    39  	ProtocolNumber = header.TCPProtocolNumber
    40  
    41  	// MinBufferSize is the smallest size of a receive or send buffer.
    42  	MinBufferSize = 4 << 10 // 4096 bytes.
    43  
    44  	// DefaultSendBufferSize is the default size of the send buffer for
    45  	// an endpoint.
    46  	DefaultSendBufferSize = 1 << 20 // 1MB
    47  
    48  	// DefaultReceiveBufferSize is the default size of the receive buffer
    49  	// for an endpoint.
    50  	DefaultReceiveBufferSize = 1 << 20 // 1MB
    51  
    52  	// MaxBufferSize is the largest size a receive/send buffer can grow to.
    53  	MaxBufferSize = 4 << 20 // 4MB
    54  
    55  	// MaxUnprocessedSegments is the maximum number of unprocessed segments
    56  	// that can be queued for a given endpoint.
    57  	MaxUnprocessedSegments = 300
    58  
    59  	// DefaultTCPLingerTimeout is the amount of time that sockets linger in
    60  	// FIN_WAIT_2 state before being marked closed.
    61  	DefaultTCPLingerTimeout = 60 * time.Second
    62  
    63  	// DefaultTCPTimeWaitTimeout is the amount of time that sockets linger
    64  	// in TIME_WAIT state before being marked closed.
    65  	DefaultTCPTimeWaitTimeout = 60 * time.Second
    66  )
    67  
    68  // SACKEnabled option can be used to enable SACK support in the TCP
    69  // protocol. See: https://tools.ietf.org/html/rfc2018.
    70  type SACKEnabled bool
    71  
    72  // DelayEnabled option can be used to enable Nagle's algorithm in the TCP protocol.
    73  type DelayEnabled bool
    74  
    75  // SendBufferSizeOption allows the default, min and max send buffer sizes for
    76  // TCP endpoints to be queried or configured.
    77  type SendBufferSizeOption struct {
    78  	Min     int
    79  	Default int
    80  	Max     int
    81  }
    82  
    83  // ReceiveBufferSizeOption allows the default, min and max receive buffer size
    84  // for TCP endpoints to be queried or configured.
    85  type ReceiveBufferSizeOption struct {
    86  	Min     int
    87  	Default int
    88  	Max     int
    89  }
    90  
    91  const (
    92  	ccReno  = "reno"
    93  	ccCubic = "cubic"
    94  )
    95  
    96  type protocol struct {
    97  	mu                         sync.Mutex
    98  	sackEnabled                bool
    99  	delayEnabled               bool
   100  	sendBufferSize             SendBufferSizeOption
   101  	recvBufferSize             ReceiveBufferSizeOption
   102  	congestionControl          string
   103  	availableCongestionControl []string
   104  	moderateReceiveBuffer      bool
   105  	tcpLingerTimeout           time.Duration
   106  	tcpTimeWaitTimeout         time.Duration
   107  }
   108  
   109  // Number returns the tcp protocol number.
   110  func (*protocol) Number() tcpip.TransportProtocolNumber {
   111  	return ProtocolNumber
   112  }
   113  
   114  // NewEndpoint creates a new tcp endpoint.
   115  func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
   116  	return newEndpoint(stack, netProto, waiterQueue), nil
   117  }
   118  
   119  // NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently
   120  // unsupported. It implements stack.TransportProtocol.NewRawEndpoint.
   121  func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
   122  	return raw.NewEndpoint(stack, netProto, header.TCPProtocolNumber, waiterQueue)
   123  }
   124  
   125  // MinimumPacketSize returns the minimum valid tcp packet size.
   126  func (*protocol) MinimumPacketSize() int {
   127  	return header.TCPMinimumSize
   128  }
   129  
   130  // ParsePorts returns the source and destination ports stored in the given tcp
   131  // packet.
   132  func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
   133  	h := header.TCP(v)
   134  	return h.SourcePort(), h.DestinationPort(), nil
   135  }
   136  
   137  // HandleUnknownDestinationPacket handles packets targeted at this protocol but
   138  // that don't match any existing endpoint.
   139  //
   140  // RFC 793, page 36, states that "If the connection does not exist (CLOSED) then
   141  // a reset is sent in response to any incoming segment except another reset. In
   142  // particular, SYNs addressed to a non-existent connection are rejected by this
   143  // means."
   144  func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool {
   145  	s := newSegment(r, id, pkt)
   146  	defer s.decRef()
   147  
   148  	if !s.parse() || !s.csumValid {
   149  		return false
   150  	}
   151  
   152  	// There's nothing to do if this is already a reset packet.
   153  	if s.flagIsSet(header.TCPFlagRst) {
   154  		return true
   155  	}
   156  
   157  	replyWithReset(s)
   158  	return true
   159  }
   160  
   161  // replyWithReset replies to the given segment with a reset segment.
   162  func replyWithReset(s *segment) {
   163  	// Get the seqnum from the packet if the ack flag is set.
   164  	seq := seqnum.Value(0)
   165  	if s.flagIsSet(header.TCPFlagAck) {
   166  		seq = s.ackNumber
   167  	}
   168  
   169  	ack := s.sequenceNumber.Add(s.logicalLen())
   170  
   171  	sendTCP(&s.route, s.id, buffer.VectorisedView{}, s.route.DefaultTTL(), stack.DefaultTOS, header.TCPFlagRst|header.TCPFlagAck, seq, ack, 0 /* rcvWnd */, nil /* options */, nil /* gso */)
   172  }
   173  
   174  // SetOption implements TransportProtocol.SetOption.
   175  func (p *protocol) SetOption(option interface{}) *tcpip.Error {
   176  	switch v := option.(type) {
   177  	case SACKEnabled:
   178  		p.mu.Lock()
   179  		p.sackEnabled = bool(v)
   180  		p.mu.Unlock()
   181  		return nil
   182  
   183  	case DelayEnabled:
   184  		p.mu.Lock()
   185  		p.delayEnabled = bool(v)
   186  		p.mu.Unlock()
   187  		return nil
   188  
   189  	case SendBufferSizeOption:
   190  		if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
   191  			return tcpip.ErrInvalidOptionValue
   192  		}
   193  		p.mu.Lock()
   194  		p.sendBufferSize = v
   195  		p.mu.Unlock()
   196  		return nil
   197  
   198  	case ReceiveBufferSizeOption:
   199  		if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
   200  			return tcpip.ErrInvalidOptionValue
   201  		}
   202  		p.mu.Lock()
   203  		p.recvBufferSize = v
   204  		p.mu.Unlock()
   205  		return nil
   206  
   207  	case tcpip.CongestionControlOption:
   208  		for _, c := range p.availableCongestionControl {
   209  			if string(v) == c {
   210  				p.mu.Lock()
   211  				p.congestionControl = string(v)
   212  				p.mu.Unlock()
   213  				return nil
   214  			}
   215  		}
   216  		// linux returns ENOENT when an invalid congestion control
   217  		// is specified.
   218  		return tcpip.ErrNoSuchFile
   219  
   220  	case tcpip.ModerateReceiveBufferOption:
   221  		p.mu.Lock()
   222  		p.moderateReceiveBuffer = bool(v)
   223  		p.mu.Unlock()
   224  		return nil
   225  
   226  	case tcpip.TCPLingerTimeoutOption:
   227  		if v < 0 {
   228  			v = 0
   229  		}
   230  		p.mu.Lock()
   231  		p.tcpLingerTimeout = time.Duration(v)
   232  		p.mu.Unlock()
   233  		return nil
   234  
   235  	case tcpip.TCPTimeWaitTimeoutOption:
   236  		if v < 0 {
   237  			v = 0
   238  		}
   239  		p.mu.Lock()
   240  		p.tcpTimeWaitTimeout = time.Duration(v)
   241  		p.mu.Unlock()
   242  		return nil
   243  
   244  	default:
   245  		return tcpip.ErrUnknownProtocolOption
   246  	}
   247  }
   248  
   249  // Option implements TransportProtocol.Option.
   250  func (p *protocol) Option(option interface{}) *tcpip.Error {
   251  	switch v := option.(type) {
   252  	case *SACKEnabled:
   253  		p.mu.Lock()
   254  		*v = SACKEnabled(p.sackEnabled)
   255  		p.mu.Unlock()
   256  		return nil
   257  
   258  	case *DelayEnabled:
   259  		p.mu.Lock()
   260  		*v = DelayEnabled(p.delayEnabled)
   261  		p.mu.Unlock()
   262  		return nil
   263  
   264  	case *SendBufferSizeOption:
   265  		p.mu.Lock()
   266  		*v = p.sendBufferSize
   267  		p.mu.Unlock()
   268  		return nil
   269  
   270  	case *ReceiveBufferSizeOption:
   271  		p.mu.Lock()
   272  		*v = p.recvBufferSize
   273  		p.mu.Unlock()
   274  		return nil
   275  
   276  	case *tcpip.CongestionControlOption:
   277  		p.mu.Lock()
   278  		*v = tcpip.CongestionControlOption(p.congestionControl)
   279  		p.mu.Unlock()
   280  		return nil
   281  
   282  	case *tcpip.AvailableCongestionControlOption:
   283  		p.mu.Lock()
   284  		*v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " "))
   285  		p.mu.Unlock()
   286  		return nil
   287  
   288  	case *tcpip.ModerateReceiveBufferOption:
   289  		p.mu.Lock()
   290  		*v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer)
   291  		p.mu.Unlock()
   292  		return nil
   293  
   294  	case *tcpip.TCPLingerTimeoutOption:
   295  		p.mu.Lock()
   296  		*v = tcpip.TCPLingerTimeoutOption(p.tcpLingerTimeout)
   297  		p.mu.Unlock()
   298  		return nil
   299  
   300  	case *tcpip.TCPTimeWaitTimeoutOption:
   301  		p.mu.Lock()
   302  		*v = tcpip.TCPTimeWaitTimeoutOption(p.tcpTimeWaitTimeout)
   303  		p.mu.Unlock()
   304  		return nil
   305  
   306  	default:
   307  		return tcpip.ErrUnknownProtocolOption
   308  	}
   309  }
   310  
   311  // NewProtocol returns a TCP transport protocol.
   312  func NewProtocol() stack.TransportProtocol {
   313  	return &protocol{
   314  		sendBufferSize:             SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
   315  		recvBufferSize:             ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
   316  		congestionControl:          ccReno,
   317  		availableCongestionControl: []string{ccReno, ccCubic},
   318  		tcpLingerTimeout:           DefaultTCPLingerTimeout,
   319  		tcpTimeWaitTimeout:         DefaultTCPTimeWaitTimeout,
   320  	}
   321  }