github.com/google/netstack@v0.0.0-20191123085552-55fcc16cd0eb/tcpip/transport/tcp/protocol.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcp contains the implementation of the TCP transport protocol. To use 16 // it in the networking stack, this package must be added to the project, and 17 // activated on the stack by passing tcp.NewProtocol() as one of the 18 // transport protocols when calling stack.New(). Then endpoints can be created 19 // by passing tcp.ProtocolNumber as the transport protocol number when calling 20 // Stack.NewEndpoint(). 21 package tcp 22 23 import ( 24 "strings" 25 "sync" 26 "time" 27 28 "github.com/google/netstack/tcpip" 29 "github.com/google/netstack/tcpip/buffer" 30 "github.com/google/netstack/tcpip/header" 31 "github.com/google/netstack/tcpip/seqnum" 32 "github.com/google/netstack/tcpip/stack" 33 "github.com/google/netstack/tcpip/transport/raw" 34 "github.com/google/netstack/waiter" 35 ) 36 37 const ( 38 // ProtocolNumber is the tcp protocol number. 39 ProtocolNumber = header.TCPProtocolNumber 40 41 // MinBufferSize is the smallest size of a receive or send buffer. 42 MinBufferSize = 4 << 10 // 4096 bytes. 43 44 // DefaultSendBufferSize is the default size of the send buffer for 45 // an endpoint. 46 DefaultSendBufferSize = 1 << 20 // 1MB 47 48 // DefaultReceiveBufferSize is the default size of the receive buffer 49 // for an endpoint. 50 DefaultReceiveBufferSize = 1 << 20 // 1MB 51 52 // MaxBufferSize is the largest size a receive/send buffer can grow to. 53 MaxBufferSize = 4 << 20 // 4MB 54 55 // MaxUnprocessedSegments is the maximum number of unprocessed segments 56 // that can be queued for a given endpoint. 57 MaxUnprocessedSegments = 300 58 59 // DefaultTCPLingerTimeout is the amount of time that sockets linger in 60 // FIN_WAIT_2 state before being marked closed. 61 DefaultTCPLingerTimeout = 60 * time.Second 62 63 // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger 64 // in TIME_WAIT state before being marked closed. 65 DefaultTCPTimeWaitTimeout = 60 * time.Second 66 ) 67 68 // SACKEnabled option can be used to enable SACK support in the TCP 69 // protocol. See: https://tools.ietf.org/html/rfc2018. 70 type SACKEnabled bool 71 72 // DelayEnabled option can be used to enable Nagle's algorithm in the TCP protocol. 73 type DelayEnabled bool 74 75 // SendBufferSizeOption allows the default, min and max send buffer sizes for 76 // TCP endpoints to be queried or configured. 77 type SendBufferSizeOption struct { 78 Min int 79 Default int 80 Max int 81 } 82 83 // ReceiveBufferSizeOption allows the default, min and max receive buffer size 84 // for TCP endpoints to be queried or configured. 85 type ReceiveBufferSizeOption struct { 86 Min int 87 Default int 88 Max int 89 } 90 91 const ( 92 ccReno = "reno" 93 ccCubic = "cubic" 94 ) 95 96 type protocol struct { 97 mu sync.Mutex 98 sackEnabled bool 99 delayEnabled bool 100 sendBufferSize SendBufferSizeOption 101 recvBufferSize ReceiveBufferSizeOption 102 congestionControl string 103 availableCongestionControl []string 104 moderateReceiveBuffer bool 105 tcpLingerTimeout time.Duration 106 tcpTimeWaitTimeout time.Duration 107 } 108 109 // Number returns the tcp protocol number. 110 func (*protocol) Number() tcpip.TransportProtocolNumber { 111 return ProtocolNumber 112 } 113 114 // NewEndpoint creates a new tcp endpoint. 115 func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 116 return newEndpoint(stack, netProto, waiterQueue), nil 117 } 118 119 // NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently 120 // unsupported. It implements stack.TransportProtocol.NewRawEndpoint. 121 func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 122 return raw.NewEndpoint(stack, netProto, header.TCPProtocolNumber, waiterQueue) 123 } 124 125 // MinimumPacketSize returns the minimum valid tcp packet size. 126 func (*protocol) MinimumPacketSize() int { 127 return header.TCPMinimumSize 128 } 129 130 // ParsePorts returns the source and destination ports stored in the given tcp 131 // packet. 132 func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { 133 h := header.TCP(v) 134 return h.SourcePort(), h.DestinationPort(), nil 135 } 136 137 // HandleUnknownDestinationPacket handles packets targeted at this protocol but 138 // that don't match any existing endpoint. 139 // 140 // RFC 793, page 36, states that "If the connection does not exist (CLOSED) then 141 // a reset is sent in response to any incoming segment except another reset. In 142 // particular, SYNs addressed to a non-existent connection are rejected by this 143 // means." 144 func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool { 145 s := newSegment(r, id, pkt) 146 defer s.decRef() 147 148 if !s.parse() || !s.csumValid { 149 return false 150 } 151 152 // There's nothing to do if this is already a reset packet. 153 if s.flagIsSet(header.TCPFlagRst) { 154 return true 155 } 156 157 replyWithReset(s) 158 return true 159 } 160 161 // replyWithReset replies to the given segment with a reset segment. 162 func replyWithReset(s *segment) { 163 // Get the seqnum from the packet if the ack flag is set. 164 seq := seqnum.Value(0) 165 if s.flagIsSet(header.TCPFlagAck) { 166 seq = s.ackNumber 167 } 168 169 ack := s.sequenceNumber.Add(s.logicalLen()) 170 171 sendTCP(&s.route, s.id, buffer.VectorisedView{}, s.route.DefaultTTL(), stack.DefaultTOS, header.TCPFlagRst|header.TCPFlagAck, seq, ack, 0 /* rcvWnd */, nil /* options */, nil /* gso */) 172 } 173 174 // SetOption implements TransportProtocol.SetOption. 175 func (p *protocol) SetOption(option interface{}) *tcpip.Error { 176 switch v := option.(type) { 177 case SACKEnabled: 178 p.mu.Lock() 179 p.sackEnabled = bool(v) 180 p.mu.Unlock() 181 return nil 182 183 case DelayEnabled: 184 p.mu.Lock() 185 p.delayEnabled = bool(v) 186 p.mu.Unlock() 187 return nil 188 189 case SendBufferSizeOption: 190 if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { 191 return tcpip.ErrInvalidOptionValue 192 } 193 p.mu.Lock() 194 p.sendBufferSize = v 195 p.mu.Unlock() 196 return nil 197 198 case ReceiveBufferSizeOption: 199 if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { 200 return tcpip.ErrInvalidOptionValue 201 } 202 p.mu.Lock() 203 p.recvBufferSize = v 204 p.mu.Unlock() 205 return nil 206 207 case tcpip.CongestionControlOption: 208 for _, c := range p.availableCongestionControl { 209 if string(v) == c { 210 p.mu.Lock() 211 p.congestionControl = string(v) 212 p.mu.Unlock() 213 return nil 214 } 215 } 216 // linux returns ENOENT when an invalid congestion control 217 // is specified. 218 return tcpip.ErrNoSuchFile 219 220 case tcpip.ModerateReceiveBufferOption: 221 p.mu.Lock() 222 p.moderateReceiveBuffer = bool(v) 223 p.mu.Unlock() 224 return nil 225 226 case tcpip.TCPLingerTimeoutOption: 227 if v < 0 { 228 v = 0 229 } 230 p.mu.Lock() 231 p.tcpLingerTimeout = time.Duration(v) 232 p.mu.Unlock() 233 return nil 234 235 case tcpip.TCPTimeWaitTimeoutOption: 236 if v < 0 { 237 v = 0 238 } 239 p.mu.Lock() 240 p.tcpTimeWaitTimeout = time.Duration(v) 241 p.mu.Unlock() 242 return nil 243 244 default: 245 return tcpip.ErrUnknownProtocolOption 246 } 247 } 248 249 // Option implements TransportProtocol.Option. 250 func (p *protocol) Option(option interface{}) *tcpip.Error { 251 switch v := option.(type) { 252 case *SACKEnabled: 253 p.mu.Lock() 254 *v = SACKEnabled(p.sackEnabled) 255 p.mu.Unlock() 256 return nil 257 258 case *DelayEnabled: 259 p.mu.Lock() 260 *v = DelayEnabled(p.delayEnabled) 261 p.mu.Unlock() 262 return nil 263 264 case *SendBufferSizeOption: 265 p.mu.Lock() 266 *v = p.sendBufferSize 267 p.mu.Unlock() 268 return nil 269 270 case *ReceiveBufferSizeOption: 271 p.mu.Lock() 272 *v = p.recvBufferSize 273 p.mu.Unlock() 274 return nil 275 276 case *tcpip.CongestionControlOption: 277 p.mu.Lock() 278 *v = tcpip.CongestionControlOption(p.congestionControl) 279 p.mu.Unlock() 280 return nil 281 282 case *tcpip.AvailableCongestionControlOption: 283 p.mu.Lock() 284 *v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " ")) 285 p.mu.Unlock() 286 return nil 287 288 case *tcpip.ModerateReceiveBufferOption: 289 p.mu.Lock() 290 *v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer) 291 p.mu.Unlock() 292 return nil 293 294 case *tcpip.TCPLingerTimeoutOption: 295 p.mu.Lock() 296 *v = tcpip.TCPLingerTimeoutOption(p.tcpLingerTimeout) 297 p.mu.Unlock() 298 return nil 299 300 case *tcpip.TCPTimeWaitTimeoutOption: 301 p.mu.Lock() 302 *v = tcpip.TCPTimeWaitTimeoutOption(p.tcpTimeWaitTimeout) 303 p.mu.Unlock() 304 return nil 305 306 default: 307 return tcpip.ErrUnknownProtocolOption 308 } 309 } 310 311 // NewProtocol returns a TCP transport protocol. 312 func NewProtocol() stack.TransportProtocol { 313 return &protocol{ 314 sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize}, 315 recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize}, 316 congestionControl: ccReno, 317 availableCongestionControl: []string{ccReno, ccCubic}, 318 tcpLingerTimeout: DefaultTCPLingerTimeout, 319 tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout, 320 } 321 }