github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/transport/tcp/protocol.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcp contains the implementation of the TCP transport protocol. 16 package tcp 17 18 import ( 19 "runtime" 20 "strings" 21 "time" 22 23 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 24 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip" 25 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/hash/jenkins" 26 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header" 27 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header/parse" 28 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/internal/tcp" 29 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/seqnum" 30 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/stack" 31 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/transport/raw" 32 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 33 ) 34 35 const ( 36 // ProtocolNumber is the tcp protocol number. 37 ProtocolNumber = header.TCPProtocolNumber 38 39 // MinBufferSize is the smallest size of a receive or send buffer. 40 MinBufferSize = 4 << 10 // 4096 bytes. 41 42 // DefaultSendBufferSize is the default size of the send buffer for 43 // an endpoint. 44 DefaultSendBufferSize = 1 << 20 // 1MB 45 46 // DefaultReceiveBufferSize is the default size of the receive buffer 47 // for an endpoint. 48 DefaultReceiveBufferSize = 1 << 20 // 1MB 49 50 // MaxBufferSize is the largest size a receive/send buffer can grow to. 51 MaxBufferSize = 4 << 20 // 4MB 52 53 // DefaultTCPLingerTimeout is the amount of time that sockets linger in 54 // FIN_WAIT_2 state before being marked closed. 55 DefaultTCPLingerTimeout = 60 * time.Second 56 57 // MaxTCPLingerTimeout is the maximum amount of time that sockets 58 // linger in FIN_WAIT_2 state before being marked closed. 59 MaxTCPLingerTimeout = 120 * time.Second 60 61 // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger 62 // in TIME_WAIT state before being marked closed. 63 DefaultTCPTimeWaitTimeout = 60 * time.Second 64 65 // DefaultSynRetries is the default value for the number of SYN retransmits 66 // before a connect is aborted. 67 DefaultSynRetries = 6 68 69 // DefaultKeepaliveIdle is the idle time for a connection before keep-alive 70 // probes are sent. 71 DefaultKeepaliveIdle = 2 * time.Hour 72 73 // DefaultKeepaliveInterval is the time between two successive keep-alive 74 // probes. 75 DefaultKeepaliveInterval = 75 * time.Second 76 77 // DefaultKeepaliveCount is the number of keep-alive probes that are sent 78 // before declaring the connection dead. 79 DefaultKeepaliveCount = 9 80 ) 81 82 const ( 83 ccReno = "reno" 84 ccCubic = "cubic" 85 ) 86 87 type protocol struct { 88 stack *stack.Stack 89 90 mu sync.RWMutex 91 sackEnabled bool 92 recovery tcpip.TCPRecovery 93 delayEnabled bool 94 alwaysUseSynCookies bool 95 sendBufferSize tcpip.TCPSendBufferSizeRangeOption 96 recvBufferSize tcpip.TCPReceiveBufferSizeRangeOption 97 congestionControl string 98 availableCongestionControl []string 99 moderateReceiveBuffer bool 100 lingerTimeout time.Duration 101 timeWaitTimeout time.Duration 102 timeWaitReuse tcpip.TCPTimeWaitReuseOption 103 minRTO time.Duration 104 maxRTO time.Duration 105 maxRetries uint32 106 synRetries uint8 107 dispatcher dispatcher 108 109 // The following secrets are initialized once and stay unchanged after. 110 seqnumSecret uint32 111 portOffsetSecret uint32 112 tsOffsetSecret uint32 113 } 114 115 // Number returns the tcp protocol number. 116 func (*protocol) Number() tcpip.TransportProtocolNumber { 117 return ProtocolNumber 118 } 119 120 // NewEndpoint creates a new tcp endpoint. 121 func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 122 return newEndpoint(p.stack, p, netProto, waiterQueue), nil 123 } 124 125 // NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently 126 // unsupported. It implements stack.TransportProtocol.NewRawEndpoint. 127 func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 128 return raw.NewEndpoint(p.stack, netProto, header.TCPProtocolNumber, waiterQueue) 129 } 130 131 // MinimumPacketSize returns the minimum valid tcp packet size. 132 func (*protocol) MinimumPacketSize() int { 133 return header.TCPMinimumSize 134 } 135 136 // ParsePorts returns the source and destination ports stored in the given tcp 137 // packet. 138 func (*protocol) ParsePorts(v []byte) (src, dst uint16, err tcpip.Error) { 139 h := header.TCP(v) 140 return h.SourcePort(), h.DestinationPort(), nil 141 } 142 143 // QueuePacket queues packets targeted at an endpoint after hashing the packet 144 // to a specific processing queue. Each queue is serviced by its own processor 145 // goroutine which is responsible for dequeuing and doing full TCP dispatch of 146 // the packet. 147 func (p *protocol) QueuePacket(ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt stack.PacketBufferPtr) { 148 p.dispatcher.queuePacket(ep, id, p.stack.Clock(), pkt) 149 } 150 151 // HandleUnknownDestinationPacket handles packets targeted at this protocol but 152 // that don't match any existing endpoint. 153 // 154 // RFC 793, page 36, states that "If the connection does not exist (CLOSED) then 155 // a reset is sent in response to any incoming segment except another reset. In 156 // particular, SYNs addressed to a non-existent connection are rejected by this 157 // means." 158 func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, pkt stack.PacketBufferPtr) stack.UnknownDestinationPacketDisposition { 159 s, err := newIncomingSegment(id, p.stack.Clock(), pkt) 160 if err != nil { 161 return stack.UnknownDestinationPacketMalformed 162 } 163 defer s.DecRef() 164 if !s.csumValid { 165 return stack.UnknownDestinationPacketMalformed 166 } 167 168 if !s.flags.Contains(header.TCPFlagRst) { 169 replyWithReset(p.stack, s, stack.DefaultTOS, tcpip.UseDefaultIPv4TTL, tcpip.UseDefaultIPv6HopLimit) 170 } 171 172 return stack.UnknownDestinationPacketHandled 173 } 174 175 func (p *protocol) tsOffset(src, dst tcpip.Address) tcp.TSOffset { 176 // Initialize a random tsOffset that will be added to the recentTS 177 // everytime the timestamp is sent when the Timestamp option is enabled. 178 // 179 // See https://tools.ietf.org/html/rfc7323#section-5.4 for details on 180 // why this is required. 181 // 182 // TODO(https://gvisor.dev/issues/6473): This is not really secure as 183 // it does not use the recommended algorithm linked above. 184 h := jenkins.Sum32(p.tsOffsetSecret) 185 // Per hash.Hash.Writer: 186 // 187 // It never returns an error. 188 _, _ = h.Write(src.AsSlice()) 189 _, _ = h.Write(dst.AsSlice()) 190 return tcp.NewTSOffset(h.Sum32()) 191 } 192 193 // replyWithReset replies to the given segment with a reset segment. 194 // 195 // If the relevant TTL has its reset value (0 for ipv4TTL, -1 for ipv6HopLimit), 196 // then the route's default TTL will be used. 197 func replyWithReset(st *stack.Stack, s *segment, tos, ipv4TTL uint8, ipv6HopLimit int16) tcpip.Error { 198 net := s.pkt.Network() 199 route, err := st.FindRoute(s.pkt.NICID, net.DestinationAddress(), net.SourceAddress(), s.pkt.NetworkProtocolNumber, false /* multicastLoop */) 200 if err != nil { 201 return err 202 } 203 defer route.Release() 204 205 ttl := calculateTTL(route, ipv4TTL, ipv6HopLimit) 206 207 // Get the seqnum from the packet if the ack flag is set. 208 seq := seqnum.Value(0) 209 ack := seqnum.Value(0) 210 flags := header.TCPFlagRst 211 // As per RFC 793 page 35 (Reset Generation) 212 // 1. If the connection does not exist (CLOSED) then a reset is sent 213 // in response to any incoming segment except another reset. In 214 // particular, SYNs addressed to a non-existent connection are rejected 215 // by this means. 216 217 // If the incoming segment has an ACK field, the reset takes its 218 // sequence number from the ACK field of the segment, otherwise the 219 // reset has sequence number zero and the ACK field is set to the sum 220 // of the sequence number and segment length of the incoming segment. 221 // The connection remains in the CLOSED state. 222 if s.flags.Contains(header.TCPFlagAck) { 223 seq = s.ackNumber 224 } else { 225 flags |= header.TCPFlagAck 226 ack = s.sequenceNumber.Add(s.logicalLen()) 227 } 228 229 p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: header.TCPMinimumSize + int(route.MaxHeaderLength())}) 230 defer p.DecRef() 231 return sendTCP(route, tcpFields{ 232 id: s.id, 233 ttl: ttl, 234 tos: tos, 235 flags: flags, 236 seq: seq, 237 ack: ack, 238 rcvWnd: 0, 239 }, p, stack.GSO{}, nil /* PacketOwner */) 240 } 241 242 // SetOption implements stack.TransportProtocol.SetOption. 243 func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error { 244 switch v := option.(type) { 245 case *tcpip.TCPSACKEnabled: 246 p.mu.Lock() 247 p.sackEnabled = bool(*v) 248 p.mu.Unlock() 249 return nil 250 251 case *tcpip.TCPRecovery: 252 p.mu.Lock() 253 p.recovery = *v 254 p.mu.Unlock() 255 return nil 256 257 case *tcpip.TCPDelayEnabled: 258 p.mu.Lock() 259 p.delayEnabled = bool(*v) 260 p.mu.Unlock() 261 return nil 262 263 case *tcpip.TCPSendBufferSizeRangeOption: 264 if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { 265 return &tcpip.ErrInvalidOptionValue{} 266 } 267 p.mu.Lock() 268 p.sendBufferSize = *v 269 p.mu.Unlock() 270 return nil 271 272 case *tcpip.TCPReceiveBufferSizeRangeOption: 273 if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { 274 return &tcpip.ErrInvalidOptionValue{} 275 } 276 p.mu.Lock() 277 p.recvBufferSize = *v 278 p.mu.Unlock() 279 return nil 280 281 case *tcpip.CongestionControlOption: 282 for _, c := range p.availableCongestionControl { 283 if string(*v) == c { 284 p.mu.Lock() 285 p.congestionControl = string(*v) 286 p.mu.Unlock() 287 return nil 288 } 289 } 290 // linux returns ENOENT when an invalid congestion control 291 // is specified. 292 return &tcpip.ErrNoSuchFile{} 293 294 case *tcpip.TCPModerateReceiveBufferOption: 295 p.mu.Lock() 296 p.moderateReceiveBuffer = bool(*v) 297 p.mu.Unlock() 298 return nil 299 300 case *tcpip.TCPLingerTimeoutOption: 301 p.mu.Lock() 302 if *v < 0 { 303 p.lingerTimeout = 0 304 } else { 305 p.lingerTimeout = time.Duration(*v) 306 } 307 p.mu.Unlock() 308 return nil 309 310 case *tcpip.TCPTimeWaitTimeoutOption: 311 p.mu.Lock() 312 if *v < 0 { 313 p.timeWaitTimeout = 0 314 } else { 315 p.timeWaitTimeout = time.Duration(*v) 316 } 317 p.mu.Unlock() 318 return nil 319 320 case *tcpip.TCPTimeWaitReuseOption: 321 if *v < tcpip.TCPTimeWaitReuseDisabled || *v > tcpip.TCPTimeWaitReuseLoopbackOnly { 322 return &tcpip.ErrInvalidOptionValue{} 323 } 324 p.mu.Lock() 325 p.timeWaitReuse = *v 326 p.mu.Unlock() 327 return nil 328 329 case *tcpip.TCPMinRTOOption: 330 p.mu.Lock() 331 defer p.mu.Unlock() 332 if *v < 0 { 333 p.minRTO = MinRTO 334 } else if minRTO := time.Duration(*v); minRTO <= p.maxRTO { 335 p.minRTO = minRTO 336 } else { 337 return &tcpip.ErrInvalidOptionValue{} 338 } 339 return nil 340 341 case *tcpip.TCPMaxRTOOption: 342 p.mu.Lock() 343 defer p.mu.Unlock() 344 if *v < 0 { 345 p.maxRTO = MaxRTO 346 } else if maxRTO := time.Duration(*v); maxRTO >= p.minRTO { 347 p.maxRTO = maxRTO 348 } else { 349 return &tcpip.ErrInvalidOptionValue{} 350 } 351 return nil 352 353 case *tcpip.TCPMaxRetriesOption: 354 p.mu.Lock() 355 p.maxRetries = uint32(*v) 356 p.mu.Unlock() 357 return nil 358 359 case *tcpip.TCPAlwaysUseSynCookies: 360 p.mu.Lock() 361 p.alwaysUseSynCookies = bool(*v) 362 p.mu.Unlock() 363 return nil 364 365 case *tcpip.TCPSynRetriesOption: 366 if *v < 1 || *v > 255 { 367 return &tcpip.ErrInvalidOptionValue{} 368 } 369 p.mu.Lock() 370 p.synRetries = uint8(*v) 371 p.mu.Unlock() 372 return nil 373 374 default: 375 return &tcpip.ErrUnknownProtocolOption{} 376 } 377 } 378 379 // Option implements stack.TransportProtocol.Option. 380 func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) tcpip.Error { 381 switch v := option.(type) { 382 case *tcpip.TCPSACKEnabled: 383 p.mu.RLock() 384 *v = tcpip.TCPSACKEnabled(p.sackEnabled) 385 p.mu.RUnlock() 386 return nil 387 388 case *tcpip.TCPRecovery: 389 p.mu.RLock() 390 *v = p.recovery 391 p.mu.RUnlock() 392 return nil 393 394 case *tcpip.TCPDelayEnabled: 395 p.mu.RLock() 396 *v = tcpip.TCPDelayEnabled(p.delayEnabled) 397 p.mu.RUnlock() 398 return nil 399 400 case *tcpip.TCPSendBufferSizeRangeOption: 401 p.mu.RLock() 402 *v = p.sendBufferSize 403 p.mu.RUnlock() 404 return nil 405 406 case *tcpip.TCPReceiveBufferSizeRangeOption: 407 p.mu.RLock() 408 *v = p.recvBufferSize 409 p.mu.RUnlock() 410 return nil 411 412 case *tcpip.CongestionControlOption: 413 p.mu.RLock() 414 *v = tcpip.CongestionControlOption(p.congestionControl) 415 p.mu.RUnlock() 416 return nil 417 418 case *tcpip.TCPAvailableCongestionControlOption: 419 p.mu.RLock() 420 *v = tcpip.TCPAvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " ")) 421 p.mu.RUnlock() 422 return nil 423 424 case *tcpip.TCPModerateReceiveBufferOption: 425 p.mu.RLock() 426 *v = tcpip.TCPModerateReceiveBufferOption(p.moderateReceiveBuffer) 427 p.mu.RUnlock() 428 return nil 429 430 case *tcpip.TCPLingerTimeoutOption: 431 p.mu.RLock() 432 *v = tcpip.TCPLingerTimeoutOption(p.lingerTimeout) 433 p.mu.RUnlock() 434 return nil 435 436 case *tcpip.TCPTimeWaitTimeoutOption: 437 p.mu.RLock() 438 *v = tcpip.TCPTimeWaitTimeoutOption(p.timeWaitTimeout) 439 p.mu.RUnlock() 440 return nil 441 442 case *tcpip.TCPTimeWaitReuseOption: 443 p.mu.RLock() 444 *v = p.timeWaitReuse 445 p.mu.RUnlock() 446 return nil 447 448 case *tcpip.TCPMinRTOOption: 449 p.mu.RLock() 450 *v = tcpip.TCPMinRTOOption(p.minRTO) 451 p.mu.RUnlock() 452 return nil 453 454 case *tcpip.TCPMaxRTOOption: 455 p.mu.RLock() 456 *v = tcpip.TCPMaxRTOOption(p.maxRTO) 457 p.mu.RUnlock() 458 return nil 459 460 case *tcpip.TCPMaxRetriesOption: 461 p.mu.RLock() 462 *v = tcpip.TCPMaxRetriesOption(p.maxRetries) 463 p.mu.RUnlock() 464 return nil 465 466 case *tcpip.TCPAlwaysUseSynCookies: 467 p.mu.RLock() 468 *v = tcpip.TCPAlwaysUseSynCookies(p.alwaysUseSynCookies) 469 p.mu.RUnlock() 470 return nil 471 472 case *tcpip.TCPSynRetriesOption: 473 p.mu.RLock() 474 *v = tcpip.TCPSynRetriesOption(p.synRetries) 475 p.mu.RUnlock() 476 return nil 477 478 default: 479 return &tcpip.ErrUnknownProtocolOption{} 480 } 481 } 482 483 // Close implements stack.TransportProtocol.Close. 484 func (p *protocol) Close() { 485 p.dispatcher.close() 486 } 487 488 // Wait implements stack.TransportProtocol.Wait. 489 func (p *protocol) Wait() { 490 p.dispatcher.wait() 491 } 492 493 // Pause implements stack.TransportProtocol.Pause. 494 func (p *protocol) Pause() { 495 p.dispatcher.pause() 496 } 497 498 // Resume implements stack.TransportProtocol.Resume. 499 func (p *protocol) Resume() { 500 p.dispatcher.resume() 501 } 502 503 // Parse implements stack.TransportProtocol.Parse. 504 func (*protocol) Parse(pkt stack.PacketBufferPtr) bool { 505 return parse.TCP(pkt) 506 } 507 508 // NewProtocol returns a TCP transport protocol. 509 func NewProtocol(s *stack.Stack) stack.TransportProtocol { 510 p := protocol{ 511 stack: s, 512 sendBufferSize: tcpip.TCPSendBufferSizeRangeOption{ 513 Min: MinBufferSize, 514 Default: DefaultSendBufferSize, 515 Max: MaxBufferSize, 516 }, 517 recvBufferSize: tcpip.TCPReceiveBufferSizeRangeOption{ 518 Min: MinBufferSize, 519 Default: DefaultReceiveBufferSize, 520 Max: MaxBufferSize, 521 }, 522 congestionControl: ccReno, 523 availableCongestionControl: []string{ccReno, ccCubic}, 524 moderateReceiveBuffer: true, 525 lingerTimeout: DefaultTCPLingerTimeout, 526 timeWaitTimeout: DefaultTCPTimeWaitTimeout, 527 timeWaitReuse: tcpip.TCPTimeWaitReuseLoopbackOnly, 528 synRetries: DefaultSynRetries, 529 minRTO: MinRTO, 530 maxRTO: MaxRTO, 531 maxRetries: MaxRetries, 532 recovery: tcpip.TCPRACKLossDetection, 533 seqnumSecret: s.Rand().Uint32(), 534 portOffsetSecret: s.Rand().Uint32(), 535 tsOffsetSecret: s.Rand().Uint32(), 536 } 537 p.dispatcher.init(s.Rand(), runtime.GOMAXPROCS(0)) 538 return &p 539 } 540 541 // protocolFromStack retrieves the tcp.protocol instance from stack s. 542 func protocolFromStack(s *stack.Stack) *protocol { 543 return s.TransportProtocolInstance(ProtocolNumber).(*protocol) 544 }