github.com/flowerwrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/transport/tcp/rcv.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tcp 16 17 import ( 18 "container/heap" 19 "time" 20 21 "github.com/FlowerWrong/netstack/tcpip/header" 22 "github.com/FlowerWrong/netstack/tcpip/seqnum" 23 ) 24 25 // receiver holds the state necessary to receive TCP segments and turn them 26 // into a stream of bytes. 27 // 28 // +stateify savable 29 type receiver struct { 30 ep *endpoint 31 32 rcvNxt seqnum.Value 33 34 // rcvAcc is one beyond the last acceptable sequence number. That is, 35 // the "largest" sequence value that the receiver has announced to the 36 // its peer that it's willing to accept. This may be different than 37 // rcvNxt + rcvWnd if the receive window is reduced; in that case we 38 // have to reduce the window as we receive more data instead of 39 // shrinking it. 40 rcvAcc seqnum.Value 41 42 // rcvWnd is the non-scaled receive window last advertised to the peer. 43 rcvWnd seqnum.Size 44 45 rcvWndScale uint8 46 47 closed bool 48 49 pendingRcvdSegments segmentHeap 50 pendingBufUsed seqnum.Size 51 pendingBufSize seqnum.Size 52 } 53 54 func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver { 55 return &receiver{ 56 ep: ep, 57 rcvNxt: irs + 1, 58 rcvAcc: irs.Add(rcvWnd + 1), 59 rcvWnd: rcvWnd, 60 rcvWndScale: rcvWndScale, 61 pendingBufSize: pendingBufSize, 62 } 63 } 64 65 // acceptable checks if the segment sequence number range is acceptable 66 // according to the table on page 26 of RFC 793. 67 func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { 68 rcvWnd := r.rcvNxt.Size(r.rcvAcc) 69 if rcvWnd == 0 { 70 return segLen == 0 && segSeq == r.rcvNxt 71 } 72 73 return segSeq.InWindow(r.rcvNxt, rcvWnd) || 74 seqnum.Overlap(r.rcvNxt, rcvWnd, segSeq, segLen) 75 } 76 77 // getSendParams returns the parameters needed by the sender when building 78 // segments to send. 79 func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) { 80 // Calculate the window size based on the available buffer space. 81 receiveBufferAvailable := r.ep.receiveBufferAvailable() 82 acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable)) 83 if r.rcvAcc.LessThan(acc) { 84 r.rcvAcc = acc 85 } 86 // Stash away the non-scaled receive window as we use it for measuring 87 // receiver's estimated RTT. 88 r.rcvWnd = r.rcvNxt.Size(r.rcvAcc) 89 return r.rcvNxt, r.rcvWnd >> r.rcvWndScale 90 } 91 92 // nonZeroWindow is called when the receive window grows from zero to nonzero; 93 // in such cases we may need to send an ack to indicate to our peer that it can 94 // resume sending data. 95 func (r *receiver) nonZeroWindow() { 96 if (r.rcvAcc-r.rcvNxt)>>r.rcvWndScale != 0 { 97 // We never got around to announcing a zero window size, so we 98 // don't need to immediately announce a nonzero one. 99 return 100 } 101 102 // Immediately send an ack. 103 r.ep.snd.sendAck() 104 } 105 106 // consumeSegment attempts to consume a segment that was received by r. The 107 // segment may have just been received or may have been received earlier but 108 // wasn't ready to be consumed then. 109 // 110 // Returns true if the segment was consumed, false if it cannot be consumed 111 // yet because of a missing segment. 112 func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool { 113 if segLen > 0 { 114 // If the segment doesn't include the seqnum we're expecting to 115 // consume now, we're missing a segment. We cannot proceed until 116 // we receive that segment though. 117 if !r.rcvNxt.InWindow(segSeq, segLen) { 118 return false 119 } 120 121 // Trim segment to eliminate already acknowledged data. 122 if segSeq.LessThan(r.rcvNxt) { 123 diff := segSeq.Size(r.rcvNxt) 124 segLen -= diff 125 segSeq.UpdateForward(diff) 126 s.sequenceNumber.UpdateForward(diff) 127 s.data.TrimFront(int(diff)) 128 } 129 130 // Move segment to ready-to-deliver list. Wakeup any waiters. 131 r.ep.readyToRead(s) 132 133 } else if segSeq != r.rcvNxt { 134 return false 135 } 136 137 // Update the segment that we're expecting to consume. 138 r.rcvNxt = segSeq.Add(segLen) 139 140 // In cases of a misbehaving sender which could send more than the 141 // advertised window, we could end up in a situation where we get a 142 // segment that exceeds the window advertised. Instead of partially 143 // accepting the segment and discarding bytes beyond the advertised 144 // window, we accept the whole segment and make sure r.rcvAcc is moved 145 // forward to match r.rcvNxt to indicate that the window is now closed. 146 // 147 // In absence of this check the r.acceptable() check fails and accepts 148 // segments that should be dropped because rcvWnd is calculated as 149 // the size of the interval (rcvNxt, rcvAcc] which becomes extremely 150 // large if rcvAcc is ever less than rcvNxt. 151 if r.rcvAcc.LessThan(r.rcvNxt) { 152 r.rcvAcc = r.rcvNxt 153 } 154 155 // Trim SACK Blocks to remove any SACK information that covers 156 // sequence numbers that have been consumed. 157 TrimSACKBlockList(&r.ep.sack, r.rcvNxt) 158 159 // Handle FIN or FIN-ACK. 160 if s.flagIsSet(header.TCPFlagFin) { 161 r.rcvNxt++ 162 163 // Send ACK immediately. 164 r.ep.snd.sendAck() 165 166 // Tell any readers that no more data will come. 167 r.closed = true 168 r.ep.readyToRead(nil) 169 170 // We just received a FIN, our next state depends on whether we sent a 171 // FIN already or not. 172 r.ep.mu.Lock() 173 switch r.ep.state { 174 case StateEstablished: 175 r.ep.state = StateCloseWait 176 case StateFinWait1: 177 if s.flagIsSet(header.TCPFlagAck) { 178 // FIN-ACK, transition to TIME-WAIT. 179 r.ep.state = StateTimeWait 180 } else { 181 // Simultaneous close, expecting a final ACK. 182 r.ep.state = StateClosing 183 } 184 case StateFinWait2: 185 r.ep.state = StateTimeWait 186 } 187 r.ep.mu.Unlock() 188 189 // Flush out any pending segments, except the very first one if 190 // it happens to be the one we're handling now because the 191 // caller is using it. 192 first := 0 193 if len(r.pendingRcvdSegments) != 0 && r.pendingRcvdSegments[0] == s { 194 first = 1 195 } 196 197 for i := first; i < len(r.pendingRcvdSegments); i++ { 198 r.pendingRcvdSegments[i].decRef() 199 } 200 r.pendingRcvdSegments = r.pendingRcvdSegments[:first] 201 202 return true 203 } 204 205 // Handle ACK (not FIN-ACK, which we handled above) during one of the 206 // shutdown states. 207 if s.flagIsSet(header.TCPFlagAck) { 208 r.ep.mu.Lock() 209 switch r.ep.state { 210 case StateFinWait1: 211 r.ep.state = StateFinWait2 212 case StateClosing: 213 r.ep.state = StateTimeWait 214 case StateLastAck: 215 r.ep.state = StateClose 216 } 217 r.ep.mu.Unlock() 218 } 219 220 return true 221 } 222 223 // updateRTT updates the receiver RTT measurement based on the sequence number 224 // of the received segment. 225 func (r *receiver) updateRTT() { 226 // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf 227 // 228 // A system that is only transmitting acknowledgements can still 229 // estimate the round-trip time by observing the time between when a byte 230 // is first acknowledged and the receipt of data that is at least one 231 // window beyond the sequence number that was acknowledged. 232 r.ep.rcvListMu.Lock() 233 if r.ep.rcvAutoParams.rttMeasureTime.IsZero() { 234 // New measurement. 235 r.ep.rcvAutoParams.rttMeasureTime = time.Now() 236 r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) 237 r.ep.rcvListMu.Unlock() 238 return 239 } 240 if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) { 241 r.ep.rcvListMu.Unlock() 242 return 243 } 244 rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime) 245 // We only store the minimum observed RTT here as this is only used in 246 // absence of a SRTT available from either timestamps or a sender 247 // measurement of RTT. 248 if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt { 249 r.ep.rcvAutoParams.rtt = rtt 250 } 251 r.ep.rcvAutoParams.rttMeasureTime = time.Now() 252 r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) 253 r.ep.rcvListMu.Unlock() 254 } 255 256 // handleRcvdSegment handles TCP segments directed at the connection managed by 257 // r as they arrive. It is called by the protocol main loop. 258 func (r *receiver) handleRcvdSegment(s *segment) { 259 // We don't care about receive processing anymore if the receive side 260 // is closed. 261 if r.closed { 262 return 263 } 264 265 segLen := seqnum.Size(s.data.Size()) 266 segSeq := s.sequenceNumber 267 268 // If the sequence number range is outside the acceptable range, just 269 // send an ACK. This is according to RFC 793, page 37. 270 if !r.acceptable(segSeq, segLen) { 271 r.ep.snd.sendAck() 272 return 273 } 274 275 // Defer segment processing if it can't be consumed now. 276 if !r.consumeSegment(s, segSeq, segLen) { 277 if segLen > 0 || s.flagIsSet(header.TCPFlagFin) { 278 // We only store the segment if it's within our buffer 279 // size limit. 280 if r.pendingBufUsed < r.pendingBufSize { 281 r.pendingBufUsed += s.logicalLen() 282 s.incRef() 283 heap.Push(&r.pendingRcvdSegments, s) 284 UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt) 285 } 286 287 // Immediately send an ack so that the peer knows it may 288 // have to retransmit. 289 r.ep.snd.sendAck() 290 } 291 return 292 } 293 294 // Since we consumed a segment update the receiver's RTT estimate 295 // if required. 296 if segLen > 0 { 297 r.updateRTT() 298 } 299 300 // By consuming the current segment, we may have filled a gap in the 301 // sequence number domain that allows pending segments to be consumed 302 // now. So try to do it. 303 for !r.closed && r.pendingRcvdSegments.Len() > 0 { 304 s := r.pendingRcvdSegments[0] 305 segLen := seqnum.Size(s.data.Size()) 306 segSeq := s.sequenceNumber 307 308 // Skip segment altogether if it has already been acknowledged. 309 if !segSeq.Add(segLen-1).LessThan(r.rcvNxt) && 310 !r.consumeSegment(s, segSeq, segLen) { 311 break 312 } 313 314 heap.Pop(&r.pendingRcvdSegments) 315 r.pendingBufUsed -= s.logicalLen() 316 s.decRef() 317 } 318 }