github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcpconntrack implements a TCP connection tracking object. It allows 16 // users with access to a segment stream to figure out when a connection is 17 // established, reset, and closed (and in the last case, who closed first). 18 package tcpconntrack 19 20 import ( 21 "github.com/sagernet/gvisor/pkg/tcpip/header" 22 "github.com/sagernet/gvisor/pkg/tcpip/seqnum" 23 ) 24 25 // Result is returned when the state of a TCB is updated in response to a 26 // segment. 27 type Result int 28 29 const ( 30 // ResultDrop indicates that the segment should be dropped. 31 ResultDrop Result = iota 32 33 // ResultConnecting indicates that the connection remains in a 34 // connecting state. 35 ResultConnecting 36 37 // ResultAlive indicates that the connection remains alive (connected). 38 ResultAlive 39 40 // ResultReset indicates that the connection was reset. 41 ResultReset 42 43 // ResultClosedByResponder indicates that the connection was gracefully 44 // closed, and the reply stream was closed first. 45 ResultClosedByResponder 46 47 // ResultClosedByOriginator indicates that the connection was gracefully 48 // closed, and the original stream was closed first. 49 ResultClosedByOriginator 50 ) 51 52 // maxWindowShift is the maximum shift value of the per the windows scale 53 // option defined by RFC 1323. 54 const maxWindowShift = 14 55 56 // TCB is a TCP Control Block. It holds state necessary to keep track of a TCP 57 // connection and inform the caller when the connection has been closed. 58 type TCB struct { 59 reply stream 60 original stream 61 62 // State handlers. hdr is not guaranteed to contain bytes beyond the TCP 63 // header itself, i.e. it may not contain the payload. 64 handlerReply func(tcb *TCB, hdr header.TCP, dataLen int) Result 65 handlerOriginal func(tcb *TCB, hdr header.TCP, dataLen int) Result 66 67 // firstFin holds a pointer to the first stream to send a FIN. 68 firstFin *stream 69 70 // state is the current state of the stream. 71 state Result 72 } 73 74 // Init initializes the state of the TCB according to the initial SYN. 75 func (t *TCB) Init(initialSyn header.TCP, dataLen int) Result { 76 t.handlerReply = synSentStateReply 77 t.handlerOriginal = synSentStateOriginal 78 79 iss := seqnum.Value(initialSyn.SequenceNumber()) 80 t.original.una = iss 81 t.original.nxt = iss.Add(logicalLenSyn(initialSyn, dataLen)) 82 t.original.end = t.original.nxt 83 // TODO(gvisor.dev/issue/6734): Cache TCP options instead of re-parsing them. 84 // Because original and reply are streams, scale applies to the reply; it is 85 // the receive window in the reply direction. 86 t.reply.shiftCnt = header.ParseSynOptions(initialSyn.Options(), false /* isAck */).WS 87 88 // Even though "end" is a sequence number, we don't know the initial 89 // receive sequence number yet, so we store the window size until we get 90 // a SYN from the server. 91 t.reply.una = 0 92 t.reply.nxt = 0 93 t.reply.end = seqnum.Value(initialSyn.WindowSize()) 94 t.state = ResultConnecting 95 return t.state 96 } 97 98 // UpdateStateReply updates the state of the TCB based on the supplied reply 99 // segment. 100 func (t *TCB) UpdateStateReply(tcp header.TCP, dataLen int) Result { 101 st := t.handlerReply(t, tcp, dataLen) 102 if st != ResultDrop { 103 t.state = st 104 } 105 return st 106 } 107 108 // UpdateStateOriginal updates the state of the TCB based on the supplied 109 // original segment. 110 func (t *TCB) UpdateStateOriginal(tcp header.TCP, dataLen int) Result { 111 st := t.handlerOriginal(t, tcp, dataLen) 112 if st != ResultDrop { 113 t.state = st 114 } 115 return st 116 } 117 118 // State returns the current state of the TCB. 119 func (t *TCB) State() Result { 120 return t.state 121 } 122 123 // IsAlive returns true as long as the connection is established(Alive) 124 // or connecting state. 125 func (t *TCB) IsAlive() bool { 126 return !t.reply.rstSeen && !t.original.rstSeen && (!t.reply.closed() || !t.original.closed()) 127 } 128 129 // OriginalSendSequenceNumber returns the snd.NXT for the original stream. 130 func (t *TCB) OriginalSendSequenceNumber() seqnum.Value { 131 return t.original.nxt 132 } 133 134 // ReplySendSequenceNumber returns the snd.NXT for the reply stream. 135 func (t *TCB) ReplySendSequenceNumber() seqnum.Value { 136 return t.reply.nxt 137 } 138 139 // adapResult modifies the supplied "Result" according to the state of the TCB; 140 // if r is anything other than "Alive", or if one of the streams isn't closed 141 // yet, it is returned unmodified. Otherwise it's converted to either 142 // ClosedByOriginator or ClosedByResponder depending on which stream was closed 143 // first. 144 func (t *TCB) adaptResult(r Result) Result { 145 // Check the unmodified case. 146 if r != ResultAlive || !t.reply.closed() || !t.original.closed() { 147 return r 148 } 149 150 // Find out which was closed first. 151 if t.firstFin == &t.original { 152 return ResultClosedByOriginator 153 } 154 155 return ResultClosedByResponder 156 } 157 158 // synSentStateReply is the state handler for reply segments when the 159 // connection is in SYN-SENT state. 160 func synSentStateReply(t *TCB, tcp header.TCP, dataLen int) Result { 161 flags := tcp.Flags() 162 ackPresent := flags&header.TCPFlagAck != 0 163 ack := seqnum.Value(tcp.AckNumber()) 164 165 // Ignore segment if ack is present but not acceptable. 166 if ackPresent && !(ack-1).InRange(t.original.una, t.original.nxt) { 167 return ResultConnecting 168 } 169 170 // If reset is specified, we will let the packet through no matter what 171 // but we will also destroy the connection if the ACK is present (and 172 // implicitly acceptable). 173 if flags&header.TCPFlagRst != 0 { 174 if ackPresent { 175 t.reply.rstSeen = true 176 return ResultReset 177 } 178 return ResultConnecting 179 } 180 181 // Ignore segment if SYN is not set. 182 if flags&header.TCPFlagSyn == 0 { 183 return ResultConnecting 184 } 185 186 // TODO(gvisor.dev/issue/6734): Cache TCP options instead of re-parsing them. 187 // Because original and reply are streams, scale applies to the reply; it is 188 // the receive window in the original direction. 189 t.original.shiftCnt = header.ParseSynOptions(tcp.Options(), ackPresent).WS 190 191 // Window scaling works only when both ends use the scale option. 192 if t.original.shiftCnt != -1 && t.reply.shiftCnt != -1 { 193 // Per RFC 1323 section 2.3: 194 // 195 // "If a Window Scale option is received with a shift.cnt value exceeding 196 // 14, the TCP should log the error but use 14 instead of the specified 197 // value." 198 if t.original.shiftCnt > maxWindowShift { 199 t.original.shiftCnt = maxWindowShift 200 } 201 if t.reply.shiftCnt > maxWindowShift { 202 t.original.shiftCnt = maxWindowShift 203 } 204 } else { 205 t.original.shiftCnt = 0 206 t.reply.shiftCnt = 0 207 } 208 // Update state informed by this SYN. 209 irs := seqnum.Value(tcp.SequenceNumber()) 210 t.reply.una = irs 211 t.reply.nxt = irs.Add(logicalLen(tcp, dataLen, seqnum.Size(t.reply.end) /* end currently holds the receive window size */)) 212 t.reply.end <<= t.reply.shiftCnt 213 t.reply.end.UpdateForward(seqnum.Size(irs)) 214 215 windowSize := t.original.windowSize(tcp) 216 t.original.end = t.original.una.Add(windowSize) 217 218 // If the ACK was set (it is acceptable), update our unacknowledgement 219 // tracking. 220 if ackPresent { 221 // Advance the "una" and "end" indices of the original stream. 222 if t.original.una.LessThan(ack) { 223 t.original.una = ack 224 } 225 226 if end := ack.Add(seqnum.Size(windowSize)); t.original.end.LessThan(end) { 227 t.original.end = end 228 } 229 } 230 231 // Update handlers so that new calls will be handled by new state. 232 t.handlerReply = allOtherReply 233 t.handlerOriginal = allOtherOriginal 234 235 return ResultAlive 236 } 237 238 // synSentStateOriginal is the state handler for original segments when the 239 // connection is in SYN-SENT state. 240 func synSentStateOriginal(t *TCB, tcp header.TCP, _ int) Result { 241 // Drop original segments that aren't retransmits of the original one. 242 if tcp.Flags() != header.TCPFlagSyn || tcp.SequenceNumber() != uint32(t.original.una) { 243 return ResultDrop 244 } 245 246 // Update the receive window. We only remember the largest value seen. 247 if wnd := seqnum.Value(tcp.WindowSize()); wnd > t.reply.end { 248 t.reply.end = wnd 249 } 250 251 return ResultConnecting 252 } 253 254 // update updates the state of reply and original streams, given the supplied 255 // reply segment. For original segments, this same function can be called with 256 // swapped reply/original streams. 257 func update(tcp header.TCP, reply, original *stream, firstFin **stream, dataLen int) Result { 258 // Ignore segments out of the window. 259 s := seqnum.Value(tcp.SequenceNumber()) 260 if !reply.acceptable(s, seqnum.Size(dataLen)) { 261 return ResultAlive 262 } 263 264 flags := tcp.Flags() 265 if flags&header.TCPFlagRst != 0 { 266 reply.rstSeen = true 267 return ResultReset 268 } 269 270 // Ignore segments that don't have the ACK flag, and those with the SYN 271 // flag. 272 if flags&header.TCPFlagAck == 0 || flags&header.TCPFlagSyn != 0 { 273 return ResultAlive 274 } 275 276 // Ignore segments that acknowledge not yet sent data. 277 ack := seqnum.Value(tcp.AckNumber()) 278 if original.nxt.LessThan(ack) { 279 return ResultAlive 280 } 281 282 // Advance the "una" and "end" indices of the original stream. 283 if original.una.LessThan(ack) { 284 original.una = ack 285 } 286 287 if end := ack.Add(original.windowSize(tcp)); original.end.LessThan(end) { 288 original.end = end 289 } 290 291 // Advance the "nxt" index of the reply stream. 292 end := s.Add(logicalLen(tcp, dataLen, reply.rwndSize())) 293 if reply.nxt.LessThan(end) { 294 reply.nxt = end 295 } 296 297 // Note the index of the FIN segment. And stash away a pointer to the 298 // first stream to see a FIN. 299 if flags&header.TCPFlagFin != 0 && !reply.finSeen { 300 reply.finSeen = true 301 reply.fin = end - 1 302 303 if *firstFin == nil { 304 *firstFin = reply 305 } 306 } 307 308 return ResultAlive 309 } 310 311 // allOtherReply is the state handler for reply segments in all states 312 // except SYN-SENT. 313 func allOtherReply(t *TCB, tcp header.TCP, dataLen int) Result { 314 return t.adaptResult(update(tcp, &t.reply, &t.original, &t.firstFin, dataLen)) 315 } 316 317 // allOtherOriginal is the state handler for original segments in all states 318 // except SYN-SENT. 319 func allOtherOriginal(t *TCB, tcp header.TCP, dataLen int) Result { 320 return t.adaptResult(update(tcp, &t.original, &t.reply, &t.firstFin, dataLen)) 321 } 322 323 // streams holds the state of a TCP unidirectional stream. 324 type stream struct { 325 // The interval [una, end) is the allowed interval as defined by the 326 // receiver, i.e., anything less than una has already been acknowledged 327 // and anything greater than or equal to end is beyond the receiver 328 // window. The interval [una, nxt) is the acknowledgable range, whose 329 // right edge indicates the sequence number of the next byte to be sent 330 // by the sender, i.e., anything greater than or equal to nxt hasn't 331 // been sent yet. 332 una seqnum.Value 333 nxt seqnum.Value 334 end seqnum.Value 335 336 // finSeen indicates if a FIN has already been sent on this stream. 337 finSeen bool 338 339 // fin is the sequence number of the FIN. It is only valid after finSeen 340 // is set to true. 341 fin seqnum.Value 342 343 // rstSeen indicates if a RST has already been sent on this stream. 344 rstSeen bool 345 346 // shiftCnt is the shift of the window scale of the receiver of the stream, 347 // i.e. in a stream from A to B it is B's receive window scale. It cannot be 348 // greater than maxWindowScale. 349 shiftCnt int 350 } 351 352 // acceptable determines if the segment with the given sequence number and data 353 // length is acceptable, i.e., if it's within the [una, end) window or, in case 354 // the window is zero, if it's a packet with no payload and sequence number 355 // equal to una. 356 func (s *stream) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { 357 return header.Acceptable(segSeq, segLen, s.una, s.end) 358 } 359 360 // closed determines if the stream has already been closed. This happens when 361 // a FIN has been set by the sender and acknowledged by the receiver. 362 func (s *stream) closed() bool { 363 return s.finSeen && s.fin.LessThan(s.una) 364 } 365 366 // rwndSize returns the stream's receive window size. 367 func (s *stream) rwndSize() seqnum.Size { 368 return s.una.Size(s.end) 369 } 370 371 // windowSize returns the stream's window size accounting for scale. 372 func (s *stream) windowSize(tcp header.TCP) seqnum.Size { 373 return seqnum.Size(tcp.WindowSize()) << s.shiftCnt 374 } 375 376 // logicalLenSyn calculates the logical length of a SYN (without ACK) segment. 377 // It is similar to logicalLen, but does not impose a window size requirement 378 // because of the SYN. 379 func logicalLenSyn(tcp header.TCP, dataLen int) seqnum.Size { 380 length := seqnum.Size(dataLen) 381 flags := tcp.Flags() 382 if flags&header.TCPFlagSyn != 0 { 383 length++ 384 } 385 if flags&header.TCPFlagFin != 0 { 386 length++ 387 } 388 return length 389 } 390 391 // logicalLen calculates the logical length of the TCP segment. 392 func logicalLen(tcp header.TCP, dataLen int, windowSize seqnum.Size) seqnum.Size { 393 // If the segment is too large, TCP trims the payload per RFC 793 page 70. 394 length := logicalLenSyn(tcp, dataLen) 395 if length > windowSize { 396 length = windowSize 397 } 398 return length 399 } 400 401 // IsEmpty returns true if tcb is not initialized. 402 func (t *TCB) IsEmpty() bool { 403 if t.reply != (stream{}) || t.original != (stream{}) { 404 return false 405 } 406 407 if t.firstFin != nil || t.state != ResultDrop { 408 return false 409 } 410 411 return true 412 }