github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/tcpip/transport/tcp/rcv.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tcp 16 17 import ( 18 "container/heap" 19 "math" 20 21 "github.com/MerlinKodo/gvisor/pkg/tcpip" 22 "github.com/MerlinKodo/gvisor/pkg/tcpip/header" 23 "github.com/MerlinKodo/gvisor/pkg/tcpip/seqnum" 24 "github.com/MerlinKodo/gvisor/pkg/tcpip/stack" 25 ) 26 27 // receiver holds the state necessary to receive TCP segments and turn them 28 // into a stream of bytes. 29 // 30 // +stateify savable 31 type receiver struct { 32 stack.TCPReceiverState 33 ep *endpoint 34 35 // rcvWnd is the non-scaled receive window last advertised to the peer. 36 rcvWnd seqnum.Size 37 38 // rcvWUP is the RcvNxt value at the last window update sent. 39 rcvWUP seqnum.Value 40 41 // prevBufused is the snapshot of endpoint rcvBufUsed taken when we 42 // advertise a receive window. 43 prevBufUsed int 44 45 closed bool 46 47 // pendingRcvdSegments is bounded by the receive buffer size of the 48 // endpoint. 49 pendingRcvdSegments segmentHeap 50 51 // Time when the last ack was received. 52 lastRcvdAckTime tcpip.MonotonicTime 53 } 54 55 func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver { 56 return &receiver{ 57 ep: ep, 58 TCPReceiverState: stack.TCPReceiverState{ 59 RcvNxt: irs + 1, 60 RcvAcc: irs.Add(rcvWnd + 1), 61 RcvWndScale: rcvWndScale, 62 }, 63 rcvWnd: rcvWnd, 64 rcvWUP: irs + 1, 65 lastRcvdAckTime: ep.stack.Clock().NowMonotonic(), 66 } 67 } 68 69 // acceptable checks if the segment sequence number range is acceptable 70 // according to the table on page 26 of RFC 793. 71 func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { 72 // r.rcvWnd could be much larger than the window size we advertised in our 73 // outgoing packets, we should use what we have advertised for acceptability 74 // test. 75 scaledWindowSize := r.rcvWnd >> r.RcvWndScale 76 if scaledWindowSize > math.MaxUint16 { 77 // This is what we actually put in the Window field. 78 scaledWindowSize = math.MaxUint16 79 } 80 advertisedWindowSize := scaledWindowSize << r.RcvWndScale 81 return header.Acceptable(segSeq, segLen, r.RcvNxt, r.RcvNxt.Add(advertisedWindowSize)) 82 } 83 84 // currentWindow returns the available space in the window that was advertised 85 // last to our peer. 86 func (r *receiver) currentWindow() (curWnd seqnum.Size) { 87 endOfWnd := r.rcvWUP.Add(r.rcvWnd) 88 if endOfWnd.LessThan(r.RcvNxt) { 89 // return 0 if r.RcvNxt is past the end of the previously advertised window. 90 // This can happen because we accept a large segment completely even if 91 // accepting it causes it to partially exceed the advertised window. 92 return 0 93 } 94 return r.RcvNxt.Size(endOfWnd) 95 } 96 97 // getSendParams returns the parameters needed by the sender when building 98 // segments to send. 99 // +checklocks:r.ep.mu 100 func (r *receiver) getSendParams() (RcvNxt seqnum.Value, rcvWnd seqnum.Size) { 101 newWnd := r.ep.selectWindow() 102 curWnd := r.currentWindow() 103 unackLen := int(r.ep.snd.MaxSentAck.Size(r.RcvNxt)) 104 bufUsed := r.ep.receiveBufferUsed() 105 106 // Grow the right edge of the window only for payloads larger than the 107 // the segment overhead OR if the application is actively consuming data. 108 // 109 // Avoiding growing the right edge otherwise, addresses a situation below: 110 // An application has been slow in reading data and we have burst of 111 // incoming segments lengths < segment overhead. Here, our available free 112 // memory would reduce drastically when compared to the advertised receive 113 // window. 114 // 115 // For example: With incoming 512 bytes segments, segment overhead of 116 // 552 bytes (at the time of writing this comment), with receive window 117 // starting from 1MB and with rcvAdvWndScale being 1, buffer would reach 0 118 // when the curWnd is still 19436 bytes, because for every incoming segment 119 // newWnd would reduce by (552+512) >> rcvAdvWndScale (current value 1), 120 // while curWnd would reduce by 512 bytes. 121 // Such a situation causes us to keep tail dropping the incoming segments 122 // and never advertise zero receive window to the peer. 123 // 124 // Linux does a similar check for minimal sk_buff size (128): 125 // https://github.com/torvalds/linux/blob/d5beb3140f91b1c8a3d41b14d729aefa4dcc58bc/net/ipv4/tcp_input.c#L783 126 // 127 // Also, if the application is reading the data, we keep growing the right 128 // edge, as we are still advertising a window that we think can be serviced. 129 toGrow := unackLen >= SegOverheadSize || bufUsed <= r.prevBufUsed 130 131 // Update RcvAcc only if new window is > previously advertised window. We 132 // should never shrink the acceptable sequence space once it has been 133 // advertised the peer. If we shrink the acceptable sequence space then we 134 // would end up dropping bytes that might already be in flight. 135 // ==================================================== sequence space. 136 // ^ ^ ^ ^ 137 // rcvWUP RcvNxt RcvAcc new RcvAcc 138 // <=====curWnd ===> 139 // <========= newWnd > curWnd ========= > 140 if r.RcvNxt.Add(curWnd).LessThan(r.RcvNxt.Add(newWnd)) && toGrow { 141 // If the new window moves the right edge, then update RcvAcc. 142 r.RcvAcc = r.RcvNxt.Add(newWnd) 143 } else { 144 if newWnd == 0 { 145 // newWnd is zero but we can't advertise a zero as it would cause window 146 // to shrink so just increment a metric to record this event. 147 r.ep.stats.ReceiveErrors.WantZeroRcvWindow.Increment() 148 } 149 newWnd = curWnd 150 } 151 152 // Apply silly-window avoidance when recovering from zero-window situation. 153 // Keep advertising zero receive window up until the new window reaches a 154 // threshold. 155 if r.rcvWnd == 0 && newWnd != 0 { 156 r.ep.rcvQueueMu.Lock() 157 if crossed, above := r.ep.windowCrossedACKThresholdLocked(int(newWnd), int(r.ep.ops.GetReceiveBufferSize())); !crossed && !above { 158 newWnd = 0 159 } 160 r.ep.rcvQueueMu.Unlock() 161 } 162 163 // Stash away the non-scaled receive window as we use it for measuring 164 // receiver's estimated RTT. 165 r.rcvWnd = newWnd 166 r.rcvWUP = r.RcvNxt 167 r.prevBufUsed = bufUsed 168 scaledWnd := r.rcvWnd >> r.RcvWndScale 169 if scaledWnd == 0 { 170 // Increment a metric if we are advertising an actual zero window. 171 r.ep.stats.ReceiveErrors.ZeroRcvWindowState.Increment() 172 } 173 174 // If we started off with a window larger than what can he held in 175 // the 16bit window field, we ceil the value to the max value. 176 if scaledWnd > math.MaxUint16 { 177 scaledWnd = seqnum.Size(math.MaxUint16) 178 179 // Ensure that the stashed receive window always reflects what 180 // is being advertised. 181 r.rcvWnd = scaledWnd << r.RcvWndScale 182 } 183 return r.RcvNxt, scaledWnd 184 } 185 186 // nonZeroWindow is called when the receive window grows from zero to nonzero; 187 // in such cases we may need to send an ack to indicate to our peer that it can 188 // resume sending data. 189 // +checklocks:r.ep.mu 190 // +checklocksalias:r.ep.snd.ep.mu=r.ep.mu 191 func (r *receiver) nonZeroWindow() { 192 // Immediately send an ack. 193 r.ep.snd.sendAck() 194 } 195 196 // consumeSegment attempts to consume a segment that was received by r. The 197 // segment may have just been received or may have been received earlier but 198 // wasn't ready to be consumed then. 199 // 200 // Returns true if the segment was consumed, false if it cannot be consumed 201 // yet because of a missing segment. 202 // +checklocks:r.ep.mu 203 // +checklocksalias:r.ep.snd.ep.mu=r.ep.mu 204 func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool { 205 if segLen > 0 { 206 // If the segment doesn't include the seqnum we're expecting to 207 // consume now, we're missing a segment. We cannot proceed until 208 // we receive that segment though. 209 if !r.RcvNxt.InWindow(segSeq, segLen) { 210 return false 211 } 212 213 // Trim segment to eliminate already acknowledged data. 214 if segSeq.LessThan(r.RcvNxt) { 215 diff := segSeq.Size(r.RcvNxt) 216 segLen -= diff 217 segSeq.UpdateForward(diff) 218 s.sequenceNumber.UpdateForward(diff) 219 s.TrimFront(diff) 220 } 221 222 // Move segment to ready-to-deliver list. Wakeup any waiters. 223 r.ep.readyToRead(s) 224 225 } else if segSeq != r.RcvNxt { 226 return false 227 } 228 229 // Update the segment that we're expecting to consume. 230 r.RcvNxt = segSeq.Add(segLen) 231 232 // In cases of a misbehaving sender which could send more than the 233 // advertised window, we could end up in a situation where we get a 234 // segment that exceeds the window advertised. Instead of partially 235 // accepting the segment and discarding bytes beyond the advertised 236 // window, we accept the whole segment and make sure r.RcvAcc is moved 237 // forward to match r.RcvNxt to indicate that the window is now closed. 238 // 239 // In absence of this check the r.acceptable() check fails and accepts 240 // segments that should be dropped because rcvWnd is calculated as 241 // the size of the interval (RcvNxt, RcvAcc] which becomes extremely 242 // large if RcvAcc is ever less than RcvNxt. 243 if r.RcvAcc.LessThan(r.RcvNxt) { 244 r.RcvAcc = r.RcvNxt 245 } 246 247 // Trim SACK Blocks to remove any SACK information that covers 248 // sequence numbers that have been consumed. 249 TrimSACKBlockList(&r.ep.sack, r.RcvNxt) 250 251 // Handle FIN or FIN-ACK. 252 if s.flags.Contains(header.TCPFlagFin) { 253 r.RcvNxt++ 254 255 // Send ACK immediately. 256 r.ep.snd.sendAck() 257 258 // Tell any readers that no more data will come. 259 r.closed = true 260 r.ep.readyToRead(nil) 261 262 // We just received a FIN, our next state depends on whether we sent a 263 // FIN already or not. 264 switch r.ep.EndpointState() { 265 case StateEstablished: 266 r.ep.setEndpointState(StateCloseWait) 267 case StateFinWait1: 268 if s.flags.Contains(header.TCPFlagAck) && s.ackNumber == r.ep.snd.SndNxt { 269 // FIN-ACK, transition to TIME-WAIT. 270 r.ep.setEndpointState(StateTimeWait) 271 } else { 272 // Simultaneous close, expecting a final ACK. 273 r.ep.setEndpointState(StateClosing) 274 } 275 case StateFinWait2: 276 r.ep.setEndpointState(StateTimeWait) 277 } 278 279 // Flush out any pending segments, except the very first one if 280 // it happens to be the one we're handling now because the 281 // caller is using it. 282 first := 0 283 if len(r.pendingRcvdSegments) != 0 && r.pendingRcvdSegments[0] == s { 284 first = 1 285 } 286 287 for i := first; i < len(r.pendingRcvdSegments); i++ { 288 r.PendingBufUsed -= r.pendingRcvdSegments[i].segMemSize() 289 r.pendingRcvdSegments[i].DecRef() 290 // Note that slice truncation does not allow garbage 291 // collection of truncated items, thus truncated items 292 // must be set to nil to avoid memory leaks. 293 r.pendingRcvdSegments[i] = nil 294 } 295 r.pendingRcvdSegments = r.pendingRcvdSegments[:first] 296 r.ep.updateConnDirectionState(connDirectionStateRcvClosed) 297 298 return true 299 } 300 301 // Handle ACK (not FIN-ACK, which we handled above) during one of the 302 // shutdown states. 303 if s.flags.Contains(header.TCPFlagAck) && s.ackNumber == r.ep.snd.SndNxt { 304 switch r.ep.EndpointState() { 305 case StateFinWait1: 306 r.ep.setEndpointState(StateFinWait2) 307 if e := r.ep; e.closed { 308 // The socket has been closed and we are in 309 // FIN-WAIT-2 so start the FIN-WAIT-2 timer. 310 e.finWait2Timer = e.stack.Clock().AfterFunc(e.tcpLingerTimeout, e.finWait2TimerExpired) 311 } 312 313 case StateClosing: 314 r.ep.setEndpointState(StateTimeWait) 315 case StateLastAck: 316 r.ep.transitionToStateCloseLocked() 317 } 318 } 319 320 return true 321 } 322 323 // updateRTT updates the receiver RTT measurement based on the sequence number 324 // of the received segment. 325 func (r *receiver) updateRTT() { 326 // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf 327 // 328 // A system that is only transmitting acknowledgements can still 329 // estimate the round-trip time by observing the time between when a byte 330 // is first acknowledged and the receipt of data that is at least one 331 // window beyond the sequence number that was acknowledged. 332 r.ep.rcvQueueMu.Lock() 333 if r.ep.RcvAutoParams.RTTMeasureTime == (tcpip.MonotonicTime{}) { 334 // New measurement. 335 r.ep.RcvAutoParams.RTTMeasureTime = r.ep.stack.Clock().NowMonotonic() 336 r.ep.RcvAutoParams.RTTMeasureSeqNumber = r.RcvNxt.Add(r.rcvWnd) 337 r.ep.rcvQueueMu.Unlock() 338 return 339 } 340 if r.RcvNxt.LessThan(r.ep.RcvAutoParams.RTTMeasureSeqNumber) { 341 r.ep.rcvQueueMu.Unlock() 342 return 343 } 344 rtt := r.ep.stack.Clock().NowMonotonic().Sub(r.ep.RcvAutoParams.RTTMeasureTime) 345 // We only store the minimum observed RTT here as this is only used in 346 // absence of a SRTT available from either timestamps or a sender 347 // measurement of RTT. 348 if r.ep.RcvAutoParams.RTT == 0 || rtt < r.ep.RcvAutoParams.RTT { 349 r.ep.RcvAutoParams.RTT = rtt 350 } 351 r.ep.RcvAutoParams.RTTMeasureTime = r.ep.stack.Clock().NowMonotonic() 352 r.ep.RcvAutoParams.RTTMeasureSeqNumber = r.RcvNxt.Add(r.rcvWnd) 353 r.ep.rcvQueueMu.Unlock() 354 } 355 356 // +checklocks:r.ep.mu 357 // +checklocksalias:r.ep.snd.ep.mu=r.ep.mu 358 func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err tcpip.Error) { 359 r.ep.rcvQueueMu.Lock() 360 rcvClosed := r.ep.RcvClosed || r.closed 361 r.ep.rcvQueueMu.Unlock() 362 363 // If we are in one of the shutdown states then we need to do 364 // additional checks before we try and process the segment. 365 switch state { 366 case StateCloseWait, StateClosing, StateLastAck: 367 if !s.sequenceNumber.LessThanEq(r.RcvNxt) { 368 // Just drop the segment as we have 369 // already received a FIN and this 370 // segment is after the sequence number 371 // for the FIN. 372 return true, nil 373 } 374 fallthrough 375 case StateFinWait1, StateFinWait2: 376 // If the ACK acks something not yet sent then we send an ACK. 377 // 378 // RFC793, page 37: If the connection is in a synchronized state, 379 // (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, 380 // TIME-WAIT), any unacceptable segment (out of window sequence number 381 // or unacceptable acknowledgment number) must elicit only an empty 382 // acknowledgment segment containing the current send-sequence number 383 // and an acknowledgment indicating the next sequence number expected 384 // to be received, and the connection remains in the same state. 385 // 386 // Just as on Linux, we do not apply this behavior when state is 387 // ESTABLISHED. 388 // Linux receive processing for all states except ESTABLISHED and 389 // TIME_WAIT is here where if the ACK check fails, we attempt to 390 // reply back with an ACK with correct seq/ack numbers. 391 // https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L6186 392 // The ESTABLISHED state processing is here where if the ACK check 393 // fails, we ignore the packet: 394 // https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591 395 if r.ep.snd.SndNxt.LessThan(s.ackNumber) { 396 r.ep.snd.maybeSendOutOfWindowAck(s) 397 return true, nil 398 } 399 400 // If we are closed for reads (either due to an 401 // incoming FIN or the user calling shutdown(.., 402 // SHUT_RD) then any data past the RcvNxt should 403 // trigger a RST. 404 endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.payloadSize())) 405 if state != StateCloseWait && rcvClosed && r.RcvNxt.LessThan(endDataSeq) { 406 return true, &tcpip.ErrConnectionAborted{} 407 } 408 if state == StateFinWait1 { 409 break 410 } 411 412 // If it's a retransmission of an old data segment 413 // or a pure ACK then allow it. 414 if s.sequenceNumber.Add(s.logicalLen()).LessThanEq(r.RcvNxt) || 415 s.logicalLen() == 0 { 416 break 417 } 418 419 // In FIN-WAIT2 if the socket is fully 420 // closed(not owned by application on our end 421 // then the only acceptable segment is a 422 // FIN. Since FIN can technically also carry 423 // data we verify that the segment carrying a 424 // FIN ends at exactly e.RcvNxt+1. 425 // 426 // From RFC793 page 25. 427 // 428 // For sequence number purposes, the SYN is 429 // considered to occur before the first actual 430 // data octet of the segment in which it occurs, 431 // while the FIN is considered to occur after 432 // the last actual data octet in a segment in 433 // which it occurs. 434 if closed && (!s.flags.Contains(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.RcvNxt+1) { 435 return true, &tcpip.ErrConnectionAborted{} 436 } 437 } 438 439 // We don't care about receive processing anymore if the receive side 440 // is closed. 441 // 442 // NOTE: We still want to permit a FIN as it's possible only our 443 // end has closed and the peer is yet to send a FIN. Hence we 444 // compare only the payload. 445 segEnd := s.sequenceNumber.Add(seqnum.Size(s.payloadSize())) 446 if rcvClosed && !segEnd.LessThanEq(r.RcvNxt) { 447 return true, nil 448 } 449 return false, nil 450 } 451 452 // handleRcvdSegment handles TCP segments directed at the connection managed by 453 // r as they arrive. It is called by the protocol main loop. 454 // +checklocks:r.ep.mu 455 // +checklocksalias:r.ep.snd.ep.mu=r.ep.mu 456 func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) { 457 state := r.ep.EndpointState() 458 closed := r.ep.closed 459 460 segLen := seqnum.Size(s.payloadSize()) 461 segSeq := s.sequenceNumber 462 463 // If the sequence number range is outside the acceptable range, just 464 // send an ACK and stop further processing of the segment. 465 // This is according to RFC 793, page 68. 466 if !r.acceptable(segSeq, segLen) { 467 r.ep.snd.maybeSendOutOfWindowAck(s) 468 return true, nil 469 } 470 471 if state != StateEstablished { 472 drop, err := r.handleRcvdSegmentClosing(s, state, closed) 473 if drop || err != nil { 474 return drop, err 475 } 476 } 477 478 // Store the time of the last ack. 479 r.lastRcvdAckTime = r.ep.stack.Clock().NowMonotonic() 480 481 // Defer segment processing if it can't be consumed now. 482 if !r.consumeSegment(s, segSeq, segLen) { 483 if segLen > 0 || s.flags.Contains(header.TCPFlagFin) { 484 // We only store the segment if it's within our buffer 485 // size limit. 486 // 487 // Only use 75% of the receive buffer queue for 488 // out-of-order segments. This ensures that we always 489 // leave some space for the inorder segments to arrive 490 // allowing pending segments to be processed and 491 // delivered to the user. 492 // 493 // The ratio must be at least 50% (the size of rwnd) to 494 // leave space for retransmitted dropped packets. 51% 495 // would make recovery slow when there are multiple 496 // drops by necessitating multiple round trips. 100% 497 // would enable the buffer to be totally full of 498 // out-of-order data and stall the connection. 499 // 500 // An ideal solution is to ensure that there are at 501 // least N bytes free when N bytes are missing, but we 502 // don't have that computed at this point in the stack. 503 if rcvBufSize := r.ep.ops.GetReceiveBufferSize(); rcvBufSize > 0 && (r.PendingBufUsed+int(segLen)) < int(rcvBufSize-rcvBufSize/4) { 504 r.ep.rcvQueueMu.Lock() 505 r.PendingBufUsed += s.segMemSize() 506 r.ep.rcvQueueMu.Unlock() 507 s.IncRef() 508 heap.Push(&r.pendingRcvdSegments, s) 509 UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.RcvNxt) 510 } 511 512 // Immediately send an ack so that the peer knows it may 513 // have to retransmit. 514 r.ep.snd.sendAck() 515 } 516 return false, nil 517 } 518 519 // Since we consumed a segment update the receiver's RTT estimate 520 // if required. 521 if segLen > 0 { 522 r.updateRTT() 523 } 524 525 // By consuming the current segment, we may have filled a gap in the 526 // sequence number domain that allows pending segments to be consumed 527 // now. So try to do it. 528 for !r.closed && r.pendingRcvdSegments.Len() > 0 { 529 s := r.pendingRcvdSegments[0] 530 segLen := seqnum.Size(s.payloadSize()) 531 segSeq := s.sequenceNumber 532 533 // Skip segment altogether if it has already been acknowledged. 534 if !segSeq.Add(segLen-1).LessThan(r.RcvNxt) && 535 !r.consumeSegment(s, segSeq, segLen) { 536 break 537 } 538 539 heap.Pop(&r.pendingRcvdSegments) 540 r.ep.rcvQueueMu.Lock() 541 r.PendingBufUsed -= s.segMemSize() 542 r.ep.rcvQueueMu.Unlock() 543 s.DecRef() 544 } 545 return false, nil 546 } 547 548 // handleTimeWaitSegment handles inbound segments received when the endpoint 549 // has entered the TIME_WAIT state. 550 // +checklocks:r.ep.mu 551 // +checklocksalias:r.ep.snd.ep.mu=r.ep.mu 552 func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn bool) { 553 segSeq := s.sequenceNumber 554 segLen := seqnum.Size(s.payloadSize()) 555 556 // Just silently drop any RST packets in TIME_WAIT. We do not support 557 // TIME_WAIT assasination as a result we confirm w/ fix 1 as described 558 // in https://tools.ietf.org/html/rfc1337#section-3. 559 // 560 // This behavior overrides RFC793 page 70 where we transition to CLOSED 561 // on receiving RST, which is also default Linux behavior. 562 // On Linux the RST can be ignored by setting sysctl net.ipv4.tcp_rfc1337. 563 // 564 // As we do not yet support PAWS, we are being conservative in ignoring 565 // RSTs by default. 566 if s.flags.Contains(header.TCPFlagRst) { 567 return false, false 568 } 569 570 // If it's a SYN and the sequence number is higher than any seen before 571 // for this connection then try and redirect it to a listening endpoint 572 // if available. 573 // 574 // RFC 1122: 575 // "When a connection is [...] on TIME-WAIT state [...] 576 // [a TCP] MAY accept a new SYN from the remote TCP to 577 // reopen the connection directly, if it: 578 579 // (1) assigns its initial sequence number for the new 580 // connection to be larger than the largest sequence 581 // number it used on the previous connection incarnation, 582 // and 583 584 // (2) returns to TIME-WAIT state if the SYN turns out 585 // to be an old duplicate". 586 if s.flags.Contains(header.TCPFlagSyn) && r.RcvNxt.LessThan(segSeq) { 587 return false, true 588 } 589 590 // Drop the segment if it does not contain an ACK. 591 if !s.flags.Contains(header.TCPFlagAck) { 592 return false, false 593 } 594 595 // Update Timestamp if required. See RFC7323, section-4.3. 596 if r.ep.SendTSOk && s.parsedOptions.TS { 597 r.ep.updateRecentTimestamp(s.parsedOptions.TSVal, r.ep.snd.MaxSentAck, segSeq) 598 } 599 600 if segSeq.Add(1) == r.RcvNxt && s.flags.Contains(header.TCPFlagFin) { 601 // If it's a FIN-ACK then resetTimeWait and send an ACK, as it 602 // indicates our final ACK could have been lost. 603 r.ep.snd.sendAck() 604 return true, false 605 } 606 607 // If the sequence number range is outside the acceptable range or 608 // carries data then just send an ACK. This is according to RFC 793, 609 // page 37. 610 // 611 // NOTE: In TIME_WAIT the only acceptable sequence number is RcvNxt. 612 if segSeq != r.RcvNxt || segLen != 0 { 613 r.ep.snd.sendAck() 614 } 615 return false, false 616 }