github.com/polevpn/netstack@v1.10.9/tcpip/header/tcp.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package header 16 17 import ( 18 "encoding/binary" 19 20 "github.com/google/btree" 21 "github.com/polevpn/netstack/tcpip" 22 "github.com/polevpn/netstack/tcpip/seqnum" 23 ) 24 25 // These constants are the offsets of the respective fields in the TCP header. 26 const ( 27 TCPSrcPortOffset = 0 28 TCPDstPortOffset = 2 29 TCPSeqNumOffset = 4 30 TCPAckNumOffset = 8 31 TCPDataOffset = 12 32 TCPFlagsOffset = 13 33 TCPWinSizeOffset = 14 34 TCPChecksumOffset = 16 35 TCPUrgentPtrOffset = 18 36 ) 37 38 const ( 39 // MaxWndScale is maximum allowed window scaling, as described in 40 // RFC 1323, section 2.3, page 11. 41 MaxWndScale = 14 42 43 // TCPMaxSACKBlocks is the maximum number of SACK blocks that can 44 // be encoded in a TCP option field. 45 TCPMaxSACKBlocks = 4 46 ) 47 48 // Flags that may be set in a TCP segment. 49 const ( 50 TCPFlagFin = 1 << iota 51 TCPFlagSyn 52 TCPFlagRst 53 TCPFlagPsh 54 TCPFlagAck 55 TCPFlagUrg 56 ) 57 58 // Options that may be present in a TCP segment. 59 const ( 60 TCPOptionEOL = 0 61 TCPOptionNOP = 1 62 TCPOptionMSS = 2 63 TCPOptionWS = 3 64 TCPOptionTS = 8 65 TCPOptionSACKPermitted = 4 66 TCPOptionSACK = 5 67 ) 68 69 // TCPFields contains the fields of a TCP packet. It is used to describe the 70 // fields of a packet that needs to be encoded. 71 type TCPFields struct { 72 // SrcPort is the "source port" field of a TCP packet. 73 SrcPort uint16 74 75 // DstPort is the "destination port" field of a TCP packet. 76 DstPort uint16 77 78 // SeqNum is the "sequence number" field of a TCP packet. 79 SeqNum uint32 80 81 // AckNum is the "acknowledgement number" field of a TCP packet. 82 AckNum uint32 83 84 // DataOffset is the "data offset" field of a TCP packet. 85 DataOffset uint8 86 87 // Flags is the "flags" field of a TCP packet. 88 Flags uint8 89 90 // WindowSize is the "window size" field of a TCP packet. 91 WindowSize uint16 92 93 // Checksum is the "checksum" field of a TCP packet. 94 Checksum uint16 95 96 // UrgentPointer is the "urgent pointer" field of a TCP packet. 97 UrgentPointer uint16 98 } 99 100 // TCPSynOptions is used to return the parsed TCP Options in a syn 101 // segment. 102 type TCPSynOptions struct { 103 // MSS is the maximum segment size provided by the peer in the SYN. 104 MSS uint16 105 106 // WS is the window scale option provided by the peer in the SYN. 107 // 108 // Set to -1 if no window scale option was provided. 109 WS int 110 111 // TS is true if the timestamp option was provided in the syn/syn-ack. 112 TS bool 113 114 // TSVal is the value of the TSVal field in the timestamp option. 115 TSVal uint32 116 117 // TSEcr is the value of the TSEcr field in the timestamp option. 118 TSEcr uint32 119 120 // SACKPermitted is true if the SACK option was provided in the SYN/SYN-ACK. 121 SACKPermitted bool 122 } 123 124 // SACKBlock represents a single contiguous SACK block. 125 // 126 // +stateify savable 127 type SACKBlock struct { 128 // Start indicates the lowest sequence number in the block. 129 Start seqnum.Value 130 131 // End indicates the sequence number immediately following the last 132 // sequence number of this block. 133 End seqnum.Value 134 } 135 136 // Less returns true if r.Start < b.Start. 137 func (r SACKBlock) Less(b btree.Item) bool { 138 return r.Start.LessThan(b.(SACKBlock).Start) 139 } 140 141 // Contains returns true if b is completely contained in r. 142 func (r SACKBlock) Contains(b SACKBlock) bool { 143 return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End) 144 } 145 146 // TCPOptions are used to parse and cache the TCP segment options for a non 147 // syn/syn-ack segment. 148 // 149 // +stateify savable 150 type TCPOptions struct { 151 // TS is true if the TimeStamp option is enabled. 152 TS bool 153 154 // TSVal is the value in the TSVal field of the segment. 155 TSVal uint32 156 157 // TSEcr is the value in the TSEcr field of the segment. 158 TSEcr uint32 159 160 // SACKBlocks are the SACK blocks specified in the segment. 161 SACKBlocks []SACKBlock 162 } 163 164 // TCP represents a TCP header stored in a byte array. 165 type TCP []byte 166 167 const ( 168 // TCPMinimumSize is the minimum size of a valid TCP packet. 169 TCPMinimumSize = 20 170 171 // TCPOptionsMaximumSize is the maximum size of TCP options. 172 TCPOptionsMaximumSize = 40 173 174 // TCPHeaderMaximumSize is the maximum header size of a TCP packet. 175 TCPHeaderMaximumSize = TCPMinimumSize + TCPOptionsMaximumSize 176 177 // TCPProtocolNumber is TCP's transport protocol number. 178 TCPProtocolNumber tcpip.TransportProtocolNumber = 6 179 180 // TCPMinimumMSS is the minimum acceptable value for MSS. This is the 181 // same as the value TCP_MIN_MSS defined net/tcp.h. 182 TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize 183 184 // TCPMaximumMSS is the maximum acceptable value for MSS. 185 TCPMaximumMSS = 0xffff 186 187 // TCPDefaultMSS is the MSS value that should be used if an MSS option 188 // is not received from the peer. It's also the value returned by 189 // TCP_MAXSEG option for a socket in an unconnected state. 190 // 191 // Per RFC 1122, page 85: "If an MSS option is not received at 192 // connection setup, TCP MUST assume a default send MSS of 536." 193 TCPDefaultMSS = 536 194 ) 195 196 // SourcePort returns the "source port" field of the tcp header. 197 func (b TCP) SourcePort() uint16 { 198 return binary.BigEndian.Uint16(b[TCPSrcPortOffset:]) 199 } 200 201 // DestinationPort returns the "destination port" field of the tcp header. 202 func (b TCP) DestinationPort() uint16 { 203 return binary.BigEndian.Uint16(b[TCPDstPortOffset:]) 204 } 205 206 // SequenceNumber returns the "sequence number" field of the tcp header. 207 func (b TCP) SequenceNumber() uint32 { 208 return binary.BigEndian.Uint32(b[TCPSeqNumOffset:]) 209 } 210 211 // AckNumber returns the "ack number" field of the tcp header. 212 func (b TCP) AckNumber() uint32 { 213 return binary.BigEndian.Uint32(b[TCPAckNumOffset:]) 214 } 215 216 // DataOffset returns the "data offset" field of the tcp header. 217 func (b TCP) DataOffset() uint8 { 218 return (b[TCPDataOffset] >> 4) * 4 219 } 220 221 // Payload returns the data in the tcp packet. 222 func (b TCP) Payload() []byte { 223 return b[b.DataOffset():] 224 } 225 226 // Flags returns the flags field of the tcp header. 227 func (b TCP) Flags() uint8 { 228 return b[TCPFlagsOffset] 229 } 230 231 // WindowSize returns the "window size" field of the tcp header. 232 func (b TCP) WindowSize() uint16 { 233 return binary.BigEndian.Uint16(b[TCPWinSizeOffset:]) 234 } 235 236 // Checksum returns the "checksum" field of the tcp header. 237 func (b TCP) Checksum() uint16 { 238 return binary.BigEndian.Uint16(b[TCPChecksumOffset:]) 239 } 240 241 // SetSourcePort sets the "source port" field of the tcp header. 242 func (b TCP) SetSourcePort(port uint16) { 243 binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port) 244 } 245 246 // SetDestinationPort sets the "destination port" field of the tcp header. 247 func (b TCP) SetDestinationPort(port uint16) { 248 binary.BigEndian.PutUint16(b[TCPDstPortOffset:], port) 249 } 250 251 // SetChecksum sets the checksum field of the tcp header. 252 func (b TCP) SetChecksum(checksum uint16) { 253 binary.BigEndian.PutUint16(b[TCPChecksumOffset:], checksum) 254 } 255 256 // CalculateChecksum calculates the checksum of the tcp segment. 257 // partialChecksum is the checksum of the network-layer pseudo-header 258 // and the checksum of the segment data. 259 func (b TCP) CalculateChecksum(partialChecksum uint16) uint16 { 260 // Calculate the rest of the checksum. 261 return Checksum(b[:b.DataOffset()], partialChecksum) 262 } 263 264 // Options returns a slice that holds the unparsed TCP options in the segment. 265 func (b TCP) Options() []byte { 266 return b[TCPMinimumSize:b.DataOffset()] 267 } 268 269 // ParsedOptions returns a TCPOptions structure which parses and caches the TCP 270 // option values in the TCP segment. NOTE: Invoking this function repeatedly is 271 // expensive as it reparses the options on each invocation. 272 func (b TCP) ParsedOptions() TCPOptions { 273 return ParseTCPOptions(b.Options()) 274 } 275 276 func (b TCP) encodeSubset(seq, ack uint32, flags uint8, rcvwnd uint16) { 277 binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seq) 278 binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ack) 279 b[TCPFlagsOffset] = flags 280 binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd) 281 } 282 283 // Encode encodes all the fields of the tcp header. 284 func (b TCP) Encode(t *TCPFields) { 285 b.encodeSubset(t.SeqNum, t.AckNum, t.Flags, t.WindowSize) 286 binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], t.SrcPort) 287 binary.BigEndian.PutUint16(b[TCPDstPortOffset:], t.DstPort) 288 b[TCPDataOffset] = (t.DataOffset / 4) << 4 289 binary.BigEndian.PutUint16(b[TCPChecksumOffset:], t.Checksum) 290 binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], t.UrgentPointer) 291 } 292 293 // EncodePartial updates a subset of the fields of the tcp header. It is useful 294 // in cases when similar segments are produced. 295 func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32, flags byte, rcvwnd uint16) { 296 // Add the total length and "flags" field contributions to the checksum. 297 // We don't use the flags field directly from the header because it's a 298 // one-byte field with an odd offset, so it would be accounted for 299 // incorrectly by the Checksum routine. 300 tmp := make([]byte, 4) 301 binary.BigEndian.PutUint16(tmp, length) 302 binary.BigEndian.PutUint16(tmp[2:], uint16(flags)) 303 checksum := Checksum(tmp, partialChecksum) 304 305 // Encode the passed-in fields. 306 b.encodeSubset(seqnum, acknum, flags, rcvwnd) 307 308 // Add the contributions of the passed-in fields to the checksum. 309 checksum = Checksum(b[TCPSeqNumOffset:TCPSeqNumOffset+8], checksum) 310 checksum = Checksum(b[TCPWinSizeOffset:TCPWinSizeOffset+2], checksum) 311 312 // Encode the checksum. 313 b.SetChecksum(^checksum) 314 } 315 316 // ParseSynOptions parses the options received in a SYN segment and returns the 317 // relevant ones. opts should point to the option part of the TCP Header. 318 func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions { 319 limit := len(opts) 320 321 synOpts := TCPSynOptions{ 322 // Per RFC 1122, page 85: "If an MSS option is not received at 323 // connection setup, TCP MUST assume a default send MSS of 536." 324 MSS: TCPDefaultMSS, 325 // If no window scale option is specified, WS in options is 326 // returned as -1; this is because the absence of the option 327 // indicates that the we cannot use window scaling on the 328 // receive end either. 329 WS: -1, 330 } 331 332 for i := 0; i < limit; { 333 switch opts[i] { 334 case TCPOptionEOL: 335 i = limit 336 case TCPOptionNOP: 337 i++ 338 case TCPOptionMSS: 339 if i+4 > limit || opts[i+1] != 4 { 340 return synOpts 341 } 342 mss := uint16(opts[i+2])<<8 | uint16(opts[i+3]) 343 if mss == 0 { 344 return synOpts 345 } 346 synOpts.MSS = mss 347 i += 4 348 349 case TCPOptionWS: 350 if i+3 > limit || opts[i+1] != 3 { 351 return synOpts 352 } 353 ws := int(opts[i+2]) 354 if ws > MaxWndScale { 355 ws = MaxWndScale 356 } 357 synOpts.WS = ws 358 i += 3 359 360 case TCPOptionTS: 361 if i+10 > limit || opts[i+1] != 10 { 362 return synOpts 363 } 364 synOpts.TSVal = binary.BigEndian.Uint32(opts[i+2:]) 365 if isAck { 366 // If the segment is a SYN-ACK then store the Timestamp Echo Reply 367 // in the segment. 368 synOpts.TSEcr = binary.BigEndian.Uint32(opts[i+6:]) 369 } 370 synOpts.TS = true 371 i += 10 372 case TCPOptionSACKPermitted: 373 if i+2 > limit || opts[i+1] != 2 { 374 return synOpts 375 } 376 synOpts.SACKPermitted = true 377 i += 2 378 379 default: 380 // We don't recognize this option, just skip over it. 381 if i+2 > limit { 382 return synOpts 383 } 384 l := int(opts[i+1]) 385 // If the length is incorrect or if l+i overflows the 386 // total options length then return false. 387 if l < 2 || i+l > limit { 388 return synOpts 389 } 390 i += l 391 } 392 } 393 394 return synOpts 395 } 396 397 // ParseTCPOptions extracts and stores all known options in the provided byte 398 // slice in a TCPOptions structure. 399 func ParseTCPOptions(b []byte) TCPOptions { 400 opts := TCPOptions{} 401 limit := len(b) 402 for i := 0; i < limit; { 403 switch b[i] { 404 case TCPOptionEOL: 405 i = limit 406 case TCPOptionNOP: 407 i++ 408 case TCPOptionTS: 409 if i+10 > limit || (b[i+1] != 10) { 410 return opts 411 } 412 opts.TS = true 413 opts.TSVal = binary.BigEndian.Uint32(b[i+2:]) 414 opts.TSEcr = binary.BigEndian.Uint32(b[i+6:]) 415 i += 10 416 case TCPOptionSACK: 417 if i+2 > limit { 418 // Malformed SACK block, just return and stop parsing. 419 return opts 420 } 421 sackOptionLen := int(b[i+1]) 422 if i+sackOptionLen > limit || (sackOptionLen-2)%8 != 0 { 423 // Malformed SACK block, just return and stop parsing. 424 return opts 425 } 426 numBlocks := (sackOptionLen - 2) / 8 427 opts.SACKBlocks = []SACKBlock{} 428 for j := 0; j < numBlocks; j++ { 429 start := binary.BigEndian.Uint32(b[i+2+j*8:]) 430 end := binary.BigEndian.Uint32(b[i+2+j*8+4:]) 431 opts.SACKBlocks = append(opts.SACKBlocks, SACKBlock{ 432 Start: seqnum.Value(start), 433 End: seqnum.Value(end), 434 }) 435 } 436 i += sackOptionLen 437 default: 438 // We don't recognize this option, just skip over it. 439 if i+2 > limit { 440 return opts 441 } 442 l := int(b[i+1]) 443 // If the length is incorrect or if l+i overflows the 444 // total options length then return false. 445 if l < 2 || i+l > limit { 446 return opts 447 } 448 i += l 449 } 450 } 451 return opts 452 } 453 454 // EncodeMSSOption encodes the MSS TCP option with the provided MSS values in 455 // the supplied buffer. If the provided buffer is not large enough then it just 456 // returns without encoding anything. It returns the number of bytes written to 457 // the provided buffer. 458 func EncodeMSSOption(mss uint32, b []byte) int { 459 // mssOptionSize is the number of bytes in a valid MSS option. 460 const mssOptionSize = 4 461 462 if len(b) < mssOptionSize { 463 return 0 464 } 465 b[0], b[1], b[2], b[3] = TCPOptionMSS, mssOptionSize, byte(mss>>8), byte(mss) 466 return mssOptionSize 467 } 468 469 // EncodeWSOption encodes the WS TCP option with the WS value in the 470 // provided buffer. If the provided buffer is not large enough then it just 471 // returns without encoding anything. It returns the number of bytes written to 472 // the provided buffer. 473 func EncodeWSOption(ws int, b []byte) int { 474 if len(b) < 3 { 475 return 0 476 } 477 b[0], b[1], b[2] = TCPOptionWS, 3, uint8(ws) 478 return int(b[1]) 479 } 480 481 // EncodeTSOption encodes the provided tsVal and tsEcr values as a TCP timestamp 482 // option into the provided buffer. If the buffer is smaller than expected it 483 // just returns without encoding anything. It returns the number of bytes 484 // written to the provided buffer. 485 func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int { 486 if len(b) < 10 { 487 return 0 488 } 489 b[0], b[1] = TCPOptionTS, 10 490 binary.BigEndian.PutUint32(b[2:], tsVal) 491 binary.BigEndian.PutUint32(b[6:], tsEcr) 492 return int(b[1]) 493 } 494 495 // EncodeSACKPermittedOption encodes a SACKPermitted option into the provided 496 // buffer. If the buffer is smaller than required it just returns without 497 // encoding anything. It returns the number of bytes written to the provided 498 // buffer. 499 func EncodeSACKPermittedOption(b []byte) int { 500 if len(b) < 2 { 501 return 0 502 } 503 504 b[0], b[1] = TCPOptionSACKPermitted, 2 505 return int(b[1]) 506 } 507 508 // EncodeSACKBlocks encodes the provided SACK blocks as a TCP SACK option block 509 // in the provided slice. It tries to fit in as many blocks as possible based on 510 // number of bytes available in the provided buffer. It returns the number of 511 // bytes written to the provided buffer. 512 func EncodeSACKBlocks(sackBlocks []SACKBlock, b []byte) int { 513 if len(sackBlocks) == 0 { 514 return 0 515 } 516 l := len(sackBlocks) 517 if l > TCPMaxSACKBlocks { 518 l = TCPMaxSACKBlocks 519 } 520 if ll := (len(b) - 2) / 8; ll < l { 521 l = ll 522 } 523 if l == 0 { 524 // There is not enough space in the provided buffer to add 525 // any SACK blocks. 526 return 0 527 } 528 b[0] = TCPOptionSACK 529 b[1] = byte(l*8 + 2) 530 for i := 0; i < l; i++ { 531 binary.BigEndian.PutUint32(b[i*8+2:], uint32(sackBlocks[i].Start)) 532 binary.BigEndian.PutUint32(b[i*8+6:], uint32(sackBlocks[i].End)) 533 } 534 return int(b[1]) 535 } 536 537 // EncodeNOP adds an explicit NOP to the option list. 538 func EncodeNOP(b []byte) int { 539 if len(b) == 0 { 540 return 0 541 } 542 b[0] = TCPOptionNOP 543 return 1 544 } 545 546 // AddTCPOptionPadding adds the required number of TCPOptionNOP to quad align 547 // the option buffer. It adds padding bytes after the offset specified and 548 // returns the number of padding bytes added. The passed in options slice 549 // must have space for the padding bytes. 550 func AddTCPOptionPadding(options []byte, offset int) int { 551 paddingToAdd := -offset & 3 552 // Now add any padding bytes that might be required to quad align the 553 // options. 554 for i := offset; i < offset+paddingToAdd; i++ { 555 options[i] = TCPOptionNOP 556 } 557 return paddingToAdd 558 }