gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/transport/tcp/accept.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tcp 16 17 import ( 18 "container/list" 19 "crypto/sha1" 20 "encoding/binary" 21 "fmt" 22 "hash" 23 "io" 24 "time" 25 26 "gvisor.dev/gvisor/pkg/sync" 27 "gvisor.dev/gvisor/pkg/tcpip" 28 "gvisor.dev/gvisor/pkg/tcpip/header" 29 "gvisor.dev/gvisor/pkg/tcpip/ports" 30 "gvisor.dev/gvisor/pkg/tcpip/seqnum" 31 "gvisor.dev/gvisor/pkg/tcpip/stack" 32 "gvisor.dev/gvisor/pkg/waiter" 33 ) 34 35 const ( 36 // tsLen is the length, in bits, of the timestamp in the SYN cookie. 37 tsLen = 8 38 39 // tsMask is a mask for timestamp values (i.e., tsLen bits). 40 tsMask = (1 << tsLen) - 1 41 42 // tsOffset is the offset, in bits, of the timestamp in the SYN cookie. 43 tsOffset = 24 44 45 // hashMask is the mask for hash values (i.e., tsOffset bits). 46 hashMask = (1 << tsOffset) - 1 47 48 // maxTSDiff is the maximum allowed difference between a received cookie 49 // timestamp and the current timestamp. If the difference is greater 50 // than maxTSDiff, the cookie is expired. 51 maxTSDiff = 2 52 ) 53 54 var ( 55 // mssTable is a slice containing the possible MSS values that we 56 // encode in the SYN cookie with two bits. 57 mssTable = []uint16{536, 1300, 1440, 1460} 58 ) 59 60 func encodeMSS(mss uint16) uint32 { 61 for i := len(mssTable) - 1; i > 0; i-- { 62 if mss >= mssTable[i] { 63 return uint32(i) 64 } 65 } 66 return 0 67 } 68 69 // listenContext is used by a listening endpoint to store state used while 70 // listening for connections. This struct is allocated by the listen goroutine 71 // and must not be accessed or have its methods called concurrently as they 72 // may mutate the stored objects. 73 type listenContext struct { 74 stack *stack.Stack 75 protocol *protocol 76 77 // rcvWnd is the receive window that is sent by this listening context 78 // in the initial SYN-ACK. 79 rcvWnd seqnum.Size 80 81 // nonce are random bytes that are initialized once when the context 82 // is created and used to seed the hash function when generating 83 // the SYN cookie. 84 nonce [2][sha1.BlockSize]byte 85 86 // listenEP is a reference to the listening endpoint associated with 87 // this context. Can be nil if the context is created by the forwarder. 88 listenEP *Endpoint 89 90 // hasherMu protects hasher. 91 hasherMu sync.Mutex 92 // hasher is the hash function used to generate a SYN cookie. 93 hasher hash.Hash 94 95 // v6Only is true if listenEP is a dual stack socket and has the 96 // IPV6_V6ONLY option set. 97 v6Only bool 98 99 // netProto indicates the network protocol(IPv4/v6) for the listening 100 // endpoint. 101 netProto tcpip.NetworkProtocolNumber 102 } 103 104 // timeStamp returns an 8-bit timestamp with a granularity of 64 seconds. 105 func timeStamp(clock tcpip.Clock) uint32 { 106 return uint32(clock.NowMonotonic().Sub(tcpip.MonotonicTime{}).Seconds()) >> 6 & tsMask 107 } 108 109 // newListenContext creates a new listen context. 110 func newListenContext(stk *stack.Stack, protocol *protocol, listenEP *Endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext { 111 l := &listenContext{ 112 stack: stk, 113 protocol: protocol, 114 rcvWnd: rcvWnd, 115 hasher: sha1.New(), 116 v6Only: v6Only, 117 netProto: netProto, 118 listenEP: listenEP, 119 } 120 121 for i := range l.nonce { 122 if _, err := io.ReadFull(stk.SecureRNG().Reader, l.nonce[i][:]); err != nil { 123 panic(err) 124 } 125 } 126 127 return l 128 } 129 130 // cookieHash calculates the cookieHash for the given id, timestamp and nonce 131 // index. The hash is used to create and validate cookies. 132 func (l *listenContext) cookieHash(id stack.TransportEndpointID, ts uint32, nonceIndex int) uint32 { 133 134 // Initialize block with fixed-size data: local ports and v. 135 var payload [8]byte 136 binary.BigEndian.PutUint16(payload[0:], id.LocalPort) 137 binary.BigEndian.PutUint16(payload[2:], id.RemotePort) 138 binary.BigEndian.PutUint32(payload[4:], ts) 139 140 // Feed everything to the hasher. 141 l.hasherMu.Lock() 142 l.hasher.Reset() 143 144 // Per hash.Hash.Writer: 145 // 146 // It never returns an error. 147 l.hasher.Write(payload[:]) 148 l.hasher.Write(l.nonce[nonceIndex][:]) 149 l.hasher.Write(id.LocalAddress.AsSlice()) 150 l.hasher.Write(id.RemoteAddress.AsSlice()) 151 152 // Finalize the calculation of the hash and return the first 4 bytes. 153 h := l.hasher.Sum(nil) 154 l.hasherMu.Unlock() 155 156 return binary.BigEndian.Uint32(h[:]) 157 } 158 159 // createCookie creates a SYN cookie for the given id and incoming sequence 160 // number. 161 func (l *listenContext) createCookie(id stack.TransportEndpointID, seq seqnum.Value, data uint32) seqnum.Value { 162 ts := timeStamp(l.stack.Clock()) 163 v := l.cookieHash(id, 0, 0) + uint32(seq) + (ts << tsOffset) 164 v += (l.cookieHash(id, ts, 1) + data) & hashMask 165 return seqnum.Value(v) 166 } 167 168 // isCookieValid checks if the supplied cookie is valid for the given id and 169 // sequence number. If it is, it also returns the data originally encoded in the 170 // cookie when createCookie was called. 171 func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnum.Value, seq seqnum.Value) (uint32, bool) { 172 ts := timeStamp(l.stack.Clock()) 173 v := uint32(cookie) - l.cookieHash(id, 0, 0) - uint32(seq) 174 cookieTS := v >> tsOffset 175 if ((ts - cookieTS) & tsMask) > maxTSDiff { 176 return 0, false 177 } 178 179 return (v - l.cookieHash(id, cookieTS, 1)) & hashMask, true 180 } 181 182 // createConnectingEndpoint creates a new endpoint in a connecting state, with 183 // the connection parameters given by the arguments. The newly created endpoint 184 // will be locked. 185 // +checklocksacquire:n.mu 186 func (l *listenContext) createConnectingEndpoint(s *segment, rcvdSynOpts header.TCPSynOptions, queue *waiter.Queue) (n *Endpoint, _ tcpip.Error) { 187 // Create a new endpoint. 188 netProto := l.netProto 189 if netProto == 0 { 190 netProto = s.pkt.NetworkProtocolNumber 191 } 192 193 route, err := l.stack.FindRoute(s.pkt.NICID, s.pkt.Network().DestinationAddress(), s.pkt.Network().SourceAddress(), s.pkt.NetworkProtocolNumber, false /* multicastLoop */) 194 if err != nil { 195 return nil, err // +checklocksignore 196 } 197 198 n = newEndpoint(l.stack, l.protocol, netProto, queue) 199 n.mu.Lock() 200 n.ops.SetV6Only(l.v6Only) 201 n.TransportEndpointInfo.ID = s.id 202 n.boundNICID = s.pkt.NICID 203 n.route = route 204 n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.pkt.NetworkProtocolNumber} 205 n.ops.SetReceiveBufferSize(int64(l.rcvWnd), false /* notify */) 206 n.amss = calculateAdvertisedMSS(n.userMSS, n.route) 207 n.setEndpointState(StateConnecting) 208 209 n.maybeEnableTimestamp(rcvdSynOpts) 210 n.maybeEnableSACKPermitted(rcvdSynOpts) 211 212 n.initGSO() 213 214 // Bootstrap the auto tuning algorithm. Starting at zero will result in 215 // a large step function on the first window adjustment causing the 216 // window to grow to a really large value. 217 initWnd := n.initialReceiveWindow() 218 n.rcvQueueMu.Lock() 219 n.RcvAutoParams.PrevCopiedBytes = initWnd 220 n.rcvQueueMu.Unlock() 221 222 return n, nil 223 } 224 225 // startHandshake creates a new endpoint in connecting state and then sends 226 // the SYN-ACK for the TCP 3-way handshake. It returns the state of the 227 // handshake in progress, which includes the new endpoint in the SYN-RCVD 228 // state. 229 // 230 // On success, a handshake h is returned. 231 // 232 // NOTE: h.ep.mu is not held and must be acquired if any state needs to be 233 // modified. 234 // 235 // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked. 236 func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (h *handshake, _ tcpip.Error) { 237 // Create new endpoint. 238 irs := s.sequenceNumber 239 isn := generateSecureISN(s.id, l.stack.Clock(), l.protocol.seqnumSecret) 240 ep, err := l.createConnectingEndpoint(s, opts, queue) 241 if err != nil { 242 return nil, err // +checklocksignore 243 } 244 245 ep.owner = owner 246 247 // listenEP is nil when listenContext is used by tcp.Forwarder. 248 deferAccept := time.Duration(0) 249 if l.listenEP != nil { 250 if l.listenEP.EndpointState() != StateListen { 251 252 // Ensure we release any registrations done by the newly 253 // created endpoint. 254 ep.mu.Unlock() 255 ep.Close() 256 257 return nil, &tcpip.ErrConnectionAborted{} // +checklocksignore 258 } 259 260 // Propagate any inheritable options from the listening endpoint 261 // to the newly created endpoint. 262 l.listenEP.propagateInheritableOptionsLocked(ep) // +checklocksforce 263 264 if !ep.reserveTupleLocked() { 265 ep.mu.Unlock() 266 ep.Close() 267 268 return nil, &tcpip.ErrConnectionAborted{} // +checklocksignore 269 } 270 271 deferAccept = l.listenEP.deferAccept 272 } 273 274 // Register new endpoint so that packets are routed to it. 275 if err := ep.stack.RegisterTransportEndpoint( 276 ep.effectiveNetProtos, 277 ProtocolNumber, 278 ep.TransportEndpointInfo.ID, 279 ep, 280 ep.boundPortFlags, 281 ep.boundBindToDevice, 282 ); err != nil { 283 ep.mu.Unlock() 284 ep.Close() 285 286 ep.drainClosingSegmentQueue() 287 288 return nil, err // +checklocksignore 289 } 290 291 ep.isRegistered = true 292 293 // Initialize and start the handshake. 294 h = ep.newPassiveHandshake(isn, irs, opts, deferAccept) 295 h.listenEP = l.listenEP 296 h.start() 297 h.ep.mu.Unlock() 298 return h, nil 299 } 300 301 // performHandshake performs a TCP 3-way handshake. On success, the new 302 // established endpoint is returned. 303 // 304 // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked. 305 func (l *listenContext) performHandshake(s *segment, opts header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*Endpoint, tcpip.Error) { 306 waitEntry, notifyCh := waiter.NewChannelEntry(waiter.WritableEvents) 307 queue.EventRegister(&waitEntry) 308 defer queue.EventUnregister(&waitEntry) 309 310 h, err := l.startHandshake(s, opts, queue, owner) 311 if err != nil { 312 return nil, err 313 } 314 315 // performHandshake is used by the Forwarder which will block till the 316 // handshake either succeeds or fails. We do this by registering for 317 // events above and block on the notification channel. 318 <-notifyCh 319 320 ep := h.ep 321 ep.mu.Lock() 322 if !ep.EndpointState().connected() { 323 ep.stack.Stats().TCP.FailedConnectionAttempts.Increment() 324 ep.stats.FailedConnectionAttempts.Increment() 325 ep.h = nil 326 ep.mu.Unlock() 327 ep.Close() 328 ep.notifyAborted() 329 ep.drainClosingSegmentQueue() 330 err := ep.LastError() 331 if err == nil { 332 // If err was nil then return the best error we can to indicate 333 // a connection failure. 334 err = &tcpip.ErrConnectionAborted{} 335 } 336 return nil, err 337 } 338 339 ep.isConnectNotified = true 340 341 // Transfer any state from the completed handshake to the endpoint. 342 // 343 // Update the receive window scaling. We can't do it before the 344 // handshake because it's possible that the peer doesn't support window 345 // scaling. 346 ep.rcv.RcvWndScale = ep.h.effectiveRcvWndScale() 347 348 // Clean up handshake state stored in the endpoint so that it can be 349 // GCed. 350 ep.h = nil 351 ep.mu.Unlock() 352 return ep, nil 353 } 354 355 // propagateInheritableOptionsLocked propagates any options set on the listening 356 // endpoint to the newly created endpoint. 357 // 358 // +checklocks:e.mu 359 // +checklocks:n.mu 360 func (e *Endpoint) propagateInheritableOptionsLocked(n *Endpoint) { 361 n.userTimeout = e.userTimeout 362 n.portFlags = e.portFlags 363 n.boundBindToDevice = e.boundBindToDevice 364 n.boundPortFlags = e.boundPortFlags 365 n.userMSS = e.userMSS 366 } 367 368 // reserveTupleLocked reserves an accepted endpoint's tuple. 369 // 370 // Precondition: e.propagateInheritableOptionsLocked has been called. 371 // 372 // +checklocks:e.mu 373 func (e *Endpoint) reserveTupleLocked() bool { 374 dest := tcpip.FullAddress{ 375 Addr: e.TransportEndpointInfo.ID.RemoteAddress, 376 Port: e.TransportEndpointInfo.ID.RemotePort, 377 } 378 portRes := ports.Reservation{ 379 Networks: e.effectiveNetProtos, 380 Transport: ProtocolNumber, 381 Addr: e.TransportEndpointInfo.ID.LocalAddress, 382 Port: e.TransportEndpointInfo.ID.LocalPort, 383 Flags: e.boundPortFlags, 384 BindToDevice: e.boundBindToDevice, 385 Dest: dest, 386 } 387 if !e.stack.ReserveTuple(portRes) { 388 e.stack.Stats().TCP.FailedPortReservations.Increment() 389 return false 390 } 391 392 e.isPortReserved = true 393 e.boundDest = dest 394 return true 395 } 396 397 // notifyAborted wakes up any waiters on registered, but not accepted 398 // endpoints. 399 // 400 // This is strictly not required normally as a socket that was never accepted 401 // can't really have any registered waiters except when stack.Wait() is called 402 // which waits for all registered endpoints to stop and expects an EventHUp. 403 func (e *Endpoint) notifyAborted() { 404 e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) 405 } 406 407 func (e *Endpoint) acceptQueueIsFull() bool { 408 e.acceptMu.Lock() 409 full := e.acceptQueue.isFull() 410 e.acceptMu.Unlock() 411 return full 412 } 413 414 // +stateify savable 415 type acceptQueue struct { 416 // NB: this could be an endpointList, but ilist only permits endpoints to 417 // belong to one list at a time, and endpoints are already stored in the 418 // dispatcher's list. 419 endpoints list.List `state:".([]*Endpoint)"` 420 421 // pendingEndpoints is a set of all endpoints for which a handshake is 422 // in progress. 423 pendingEndpoints map[*Endpoint]struct{} 424 425 // capacity is the maximum number of endpoints that can be in endpoints. 426 capacity int 427 } 428 429 func (a *acceptQueue) isFull() bool { 430 return a.endpoints.Len() >= a.capacity 431 } 432 433 // handleListenSegment is called when a listening endpoint receives a segment 434 // and needs to handle it. 435 // 436 // +checklocks:e.mu 437 func (e *Endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Error { 438 e.rcvQueueMu.Lock() 439 rcvClosed := e.RcvClosed 440 e.rcvQueueMu.Unlock() 441 if rcvClosed || s.flags.Contains(header.TCPFlagSyn|header.TCPFlagAck) { 442 // If the endpoint is shutdown, reply with reset. 443 // 444 // RFC 793 section 3.4 page 35 (figure 12) outlines that a RST 445 // must be sent in response to a SYN-ACK while in the listen 446 // state to prevent completing a handshake from an old SYN. 447 return replyWithReset(e.stack, s, e.sendTOS, e.ipv4TTL, e.ipv6HopLimit) 448 } 449 450 switch { 451 case s.flags.Contains(header.TCPFlagRst): 452 e.stack.Stats().DroppedPackets.Increment() 453 return nil 454 455 case s.flags.Contains(header.TCPFlagSyn): 456 if e.acceptQueueIsFull() { 457 e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() 458 e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment() 459 e.stack.Stats().DroppedPackets.Increment() 460 return nil 461 } 462 463 opts := parseSynSegmentOptions(s) 464 465 useSynCookies, err := func() (bool, tcpip.Error) { 466 var alwaysUseSynCookies tcpip.TCPAlwaysUseSynCookies 467 if err := e.stack.TransportProtocolOption(header.TCPProtocolNumber, &alwaysUseSynCookies); err != nil { 468 panic(fmt.Sprintf("TransportProtocolOption(%d, %T) = %s", header.TCPProtocolNumber, alwaysUseSynCookies, err)) 469 } 470 if alwaysUseSynCookies { 471 return true, nil 472 } 473 e.acceptMu.Lock() 474 defer e.acceptMu.Unlock() 475 476 // The capacity of the accepted queue would always be one greater than the 477 // listen backlog. But, the SYNRCVD connections count is always checked 478 // against the listen backlog value for Linux parity reason. 479 // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280 480 if len(e.acceptQueue.pendingEndpoints) == e.acceptQueue.capacity-1 { 481 return true, nil 482 } 483 484 h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner) 485 if err != nil { 486 e.stack.Stats().TCP.FailedConnectionAttempts.Increment() 487 e.stats.FailedConnectionAttempts.Increment() 488 return false, err 489 } 490 e.acceptQueue.pendingEndpoints[h.ep] = struct{}{} 491 492 return false, nil 493 }() 494 if err != nil { 495 return err 496 } 497 if !useSynCookies { 498 return nil 499 } 500 501 net := s.pkt.Network() 502 route, err := e.stack.FindRoute(s.pkt.NICID, net.DestinationAddress(), net.SourceAddress(), s.pkt.NetworkProtocolNumber, false /* multicastLoop */) 503 if err != nil { 504 return err 505 } 506 defer route.Release() 507 508 // Send SYN without window scaling because we currently 509 // don't encode this information in the cookie. 510 // 511 // Enable Timestamp option if the original syn did have 512 // the timestamp option specified. 513 // 514 // Use the user supplied MSS on the listening socket for 515 // new connections, if available. 516 synOpts := header.TCPSynOptions{ 517 WS: -1, 518 TS: opts.TS, 519 TSEcr: opts.TSVal, 520 MSS: calculateAdvertisedMSS(e.userMSS, route), 521 } 522 if opts.TS { 523 offset := e.protocol.tsOffset(net.DestinationAddress(), net.SourceAddress()) 524 now := e.stack.Clock().NowMonotonic() 525 synOpts.TSVal = offset.TSVal(now) 526 } 527 cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) 528 fields := tcpFields{ 529 id: s.id, 530 ttl: calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit), 531 tos: e.sendTOS, 532 flags: header.TCPFlagSyn | header.TCPFlagAck, 533 seq: cookie, 534 ack: s.sequenceNumber + 1, 535 rcvWnd: ctx.rcvWnd, 536 } 537 if err := e.sendSynTCP(route, fields, synOpts); err != nil { 538 return err 539 } 540 e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment() 541 return nil 542 543 case s.flags.Contains(header.TCPFlagAck): 544 iss := s.ackNumber - 1 545 irs := s.sequenceNumber - 1 546 547 // As an edge case when SYN-COOKIES are in use and we receive a 548 // segment that has data and is valid we should check if it 549 // already matches a created endpoint and redirect the segment 550 // rather than try and create a new endpoint. This can happen 551 // where the final ACK for the handshake and other data packets 552 // arrive at the same time and are queued to the listening 553 // endpoint before the listening endpoint has had time to 554 // process the first ACK and create the endpoint that matches 555 // the incoming packet's full 5 tuple. 556 netProtos := []tcpip.NetworkProtocolNumber{s.pkt.NetworkProtocolNumber} 557 // If the local address is an IPv4 Address then also look for IPv6 558 // dual stack endpoints. 559 if s.id.LocalAddress.To4() != (tcpip.Address{}) { 560 netProtos = []tcpip.NetworkProtocolNumber{header.IPv4ProtocolNumber, header.IPv6ProtocolNumber} 561 } 562 for _, netProto := range netProtos { 563 if newEP := e.stack.FindTransportEndpoint(netProto, ProtocolNumber, s.id, s.pkt.NICID); newEP != nil && newEP != e { 564 tcpEP := newEP.(*Endpoint) 565 if !tcpEP.EndpointState().connected() { 566 continue 567 } 568 if !tcpEP.enqueueSegment(s) { 569 // Just silently drop the segment as we failed 570 // to queue, we don't want to generate a RST 571 // further below or try and create a new 572 // endpoint etc. 573 return nil 574 } 575 tcpEP.notifyProcessor() 576 return nil 577 } 578 } 579 580 // Since SYN cookies are in use this is potentially an ACK to a 581 // SYN-ACK we sent but don't have a half open connection state 582 // as cookies are being used to protect against a potential SYN 583 // flood. In such cases validate the cookie and if valid create 584 // a fully connected endpoint and deliver to the accept queue. 585 // 586 // If not, silently drop the ACK to avoid leaking information 587 // when under a potential syn flood attack. 588 // 589 // Validate the cookie. 590 data, ok := ctx.isCookieValid(s.id, iss, irs) 591 if !ok || int(data) >= len(mssTable) { 592 e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment() 593 e.stack.Stats().DroppedPackets.Increment() 594 595 // When not using SYN cookies, as per RFC 793, section 3.9, page 64: 596 // Any acknowledgment is bad if it arrives on a connection still in 597 // the LISTEN state. An acceptable reset segment should be formed 598 // for any arriving ACK-bearing segment. The RST should be 599 // formatted as follows: 600 // 601 // <SEQ=SEG.ACK><CTL=RST> 602 // 603 // Send a reset as this is an ACK for which there is no 604 // half open connections and we are not using cookies 605 // yet. 606 // 607 // The only time we should reach here when a connection 608 // was opened and closed really quickly and a delayed 609 // ACK was received from the sender. 610 return replyWithReset(e.stack, s, e.sendTOS, e.ipv4TTL, e.ipv6HopLimit) 611 } 612 613 // Keep hold of acceptMu until the new endpoint is in the accept queue (or 614 // if there is an error), to guarantee that we will keep our spot in the 615 // queue even if another handshake from the syn queue completes. 616 e.acceptMu.Lock() 617 if e.acceptQueue.isFull() { 618 // Silently drop the ack as the application can't accept 619 // the connection at this point. The ack will be 620 // retransmitted by the sender anyway and we can 621 // complete the connection at the time of retransmit if 622 // the backlog has space. 623 e.acceptMu.Unlock() 624 e.stack.Stats().TCP.ListenOverflowAckDrop.Increment() 625 e.stats.ReceiveErrors.ListenOverflowAckDrop.Increment() 626 e.stack.Stats().DroppedPackets.Increment() 627 return nil 628 } 629 630 e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment() 631 // Create newly accepted endpoint and deliver it. 632 rcvdSynOptions := header.TCPSynOptions{ 633 MSS: mssTable[data], 634 // Disable Window scaling as original SYN is 635 // lost. 636 WS: -1, 637 } 638 639 // When syn cookies are in use we enable timestamp only 640 // if the ack specifies the timestamp option assuming 641 // that the other end did in fact negotiate the 642 // timestamp option in the original SYN. 643 if s.parsedOptions.TS { 644 rcvdSynOptions.TS = true 645 rcvdSynOptions.TSVal = s.parsedOptions.TSVal 646 rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr 647 } 648 649 n, err := ctx.createConnectingEndpoint(s, rcvdSynOptions, &waiter.Queue{}) 650 if err != nil { 651 e.acceptMu.Unlock() 652 return err 653 } 654 655 // Propagate any inheritable options from the listening endpoint 656 // to the newly created endpoint. 657 e.propagateInheritableOptionsLocked(n) 658 659 if !n.reserveTupleLocked() { 660 n.mu.Unlock() 661 e.acceptMu.Unlock() 662 n.Close() 663 664 e.stack.Stats().TCP.FailedConnectionAttempts.Increment() 665 e.stats.FailedConnectionAttempts.Increment() 666 return nil 667 } 668 669 // Register new endpoint so that packets are routed to it. 670 if err := n.stack.RegisterTransportEndpoint( 671 n.effectiveNetProtos, 672 ProtocolNumber, 673 n.TransportEndpointInfo.ID, 674 n, 675 n.boundPortFlags, 676 n.boundBindToDevice, 677 ); err != nil { 678 n.mu.Unlock() 679 e.acceptMu.Unlock() 680 n.Close() 681 682 e.stack.Stats().TCP.FailedConnectionAttempts.Increment() 683 e.stats.FailedConnectionAttempts.Increment() 684 return err 685 } 686 687 n.isRegistered = true 688 net := s.pkt.Network() 689 n.TSOffset = n.protocol.tsOffset(net.DestinationAddress(), net.SourceAddress()) 690 691 // Switch state to connected. 692 n.isConnectNotified = true 693 h := handshake{ 694 ep: n, 695 iss: iss, 696 ackNum: irs + 1, 697 rcvWnd: seqnum.Size(n.initialReceiveWindow()), 698 sndWnd: s.window, 699 rcvWndScale: e.rcvWndScaleForHandshake(), 700 sndWndScale: rcvdSynOptions.WS, 701 mss: rcvdSynOptions.MSS, 702 sampleRTTWithTSOnly: true, 703 } 704 h.ep.AssertLockHeld(n) 705 h.transitionToStateEstablishedLocked(s) 706 n.mu.Unlock() 707 708 // Requeue the segment if the ACK completing the handshake has more info 709 // to be processed by the newly established endpoint. 710 if (s.flags.Contains(header.TCPFlagFin) || s.payloadSize() > 0) && n.enqueueSegment(s) { 711 n.notifyProcessor() 712 } 713 714 e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() 715 716 // Deliver the endpoint to the accept queue. 717 e.acceptQueue.endpoints.PushBack(n) 718 e.acceptMu.Unlock() 719 720 e.waiterQueue.Notify(waiter.ReadableEvents) 721 return nil 722 723 default: 724 e.stack.Stats().DroppedPackets.Increment() 725 return nil 726 } 727 }