github.com/koomox/wireguard-go@v0.0.0-20230722134753-17a50b2f22a3/tun/tcp_offload_linux.go (about) 1 /* SPDX-License-Identifier: MIT 2 * 3 * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. 4 */ 5 6 package tun 7 8 import ( 9 "bytes" 10 "encoding/binary" 11 "errors" 12 "io" 13 "unsafe" 14 15 "golang.org/x/sys/unix" 16 "github.com/koomox/wireguard-go/conn" 17 ) 18 19 const tcpFlagsOffset = 13 20 21 const ( 22 tcpFlagFIN uint8 = 0x01 23 tcpFlagPSH uint8 = 0x08 24 tcpFlagACK uint8 = 0x10 25 ) 26 27 // virtioNetHdr is defined in the kernel in include/uapi/linux/virtio_net.h. The 28 // kernel symbol is virtio_net_hdr. 29 type virtioNetHdr struct { 30 flags uint8 31 gsoType uint8 32 hdrLen uint16 33 gsoSize uint16 34 csumStart uint16 35 csumOffset uint16 36 } 37 38 func (v *virtioNetHdr) decode(b []byte) error { 39 if len(b) < virtioNetHdrLen { 40 return io.ErrShortBuffer 41 } 42 copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen), b[:virtioNetHdrLen]) 43 return nil 44 } 45 46 func (v *virtioNetHdr) encode(b []byte) error { 47 if len(b) < virtioNetHdrLen { 48 return io.ErrShortBuffer 49 } 50 copy(b[:virtioNetHdrLen], unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen)) 51 return nil 52 } 53 54 const ( 55 // virtioNetHdrLen is the length in bytes of virtioNetHdr. This matches the 56 // shape of the C ABI for its kernel counterpart -- sizeof(virtio_net_hdr). 57 virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{})) 58 ) 59 60 // flowKey represents the key for a flow. 61 type flowKey struct { 62 srcAddr, dstAddr [16]byte 63 srcPort, dstPort uint16 64 rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows. 65 } 66 67 // tcpGROTable holds flow and coalescing information for the purposes of GRO. 68 type tcpGROTable struct { 69 itemsByFlow map[flowKey][]tcpGROItem 70 itemsPool [][]tcpGROItem 71 } 72 73 func newTCPGROTable() *tcpGROTable { 74 t := &tcpGROTable{ 75 itemsByFlow: make(map[flowKey][]tcpGROItem, conn.IdealBatchSize), 76 itemsPool: make([][]tcpGROItem, conn.IdealBatchSize), 77 } 78 for i := range t.itemsPool { 79 t.itemsPool[i] = make([]tcpGROItem, 0, conn.IdealBatchSize) 80 } 81 return t 82 } 83 84 func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey { 85 key := flowKey{} 86 addrSize := dstAddr - srcAddr 87 copy(key.srcAddr[:], pkt[srcAddr:dstAddr]) 88 copy(key.dstAddr[:], pkt[dstAddr:dstAddr+addrSize]) 89 key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:]) 90 key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:]) 91 key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:]) 92 return key 93 } 94 95 // lookupOrInsert looks up a flow for the provided packet and metadata, 96 // returning the packets found for the flow, or inserting a new one if none 97 // is found. 98 func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) { 99 key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) 100 items, ok := t.itemsByFlow[key] 101 if ok { 102 return items, ok 103 } 104 // TODO: insert() performs another map lookup. This could be rearranged to avoid. 105 t.insert(pkt, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex) 106 return nil, false 107 } 108 109 // insert an item in the table for the provided packet and packet metadata. 110 func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) { 111 key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) 112 item := tcpGROItem{ 113 key: key, 114 bufsIndex: uint16(bufsIndex), 115 gsoSize: uint16(len(pkt[tcphOffset+tcphLen:])), 116 iphLen: uint8(tcphOffset), 117 tcphLen: uint8(tcphLen), 118 sentSeq: binary.BigEndian.Uint32(pkt[tcphOffset+4:]), 119 pshSet: pkt[tcphOffset+tcpFlagsOffset]&tcpFlagPSH != 0, 120 } 121 items, ok := t.itemsByFlow[key] 122 if !ok { 123 items = t.newItems() 124 } 125 items = append(items, item) 126 t.itemsByFlow[key] = items 127 } 128 129 func (t *tcpGROTable) updateAt(item tcpGROItem, i int) { 130 items, _ := t.itemsByFlow[item.key] 131 items[i] = item 132 } 133 134 func (t *tcpGROTable) deleteAt(key flowKey, i int) { 135 items, _ := t.itemsByFlow[key] 136 items = append(items[:i], items[i+1:]...) 137 t.itemsByFlow[key] = items 138 } 139 140 // tcpGROItem represents bookkeeping data for a TCP packet during the lifetime 141 // of a GRO evaluation across a vector of packets. 142 type tcpGROItem struct { 143 key flowKey 144 sentSeq uint32 // the sequence number 145 bufsIndex uint16 // the index into the original bufs slice 146 numMerged uint16 // the number of packets merged into this item 147 gsoSize uint16 // payload size 148 iphLen uint8 // ip header len 149 tcphLen uint8 // tcp header len 150 pshSet bool // psh flag is set 151 } 152 153 func (t *tcpGROTable) newItems() []tcpGROItem { 154 var items []tcpGROItem 155 items, t.itemsPool = t.itemsPool[len(t.itemsPool)-1], t.itemsPool[:len(t.itemsPool)-1] 156 return items 157 } 158 159 func (t *tcpGROTable) reset() { 160 for k, items := range t.itemsByFlow { 161 items = items[:0] 162 t.itemsPool = append(t.itemsPool, items) 163 delete(t.itemsByFlow, k) 164 } 165 } 166 167 // canCoalesce represents the outcome of checking if two TCP packets are 168 // candidates for coalescing. 169 type canCoalesce int 170 171 const ( 172 coalescePrepend canCoalesce = -1 173 coalesceUnavailable canCoalesce = 0 174 coalesceAppend canCoalesce = 1 175 ) 176 177 // tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet 178 // described by item. This function makes considerations that match the kernel's 179 // GRO self tests, which can be found in tools/testing/selftests/net/gro.c. 180 func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet bool, gsoSize uint16, item tcpGROItem, bufs [][]byte, bufsOffset int) canCoalesce { 181 pktTarget := bufs[item.bufsIndex][bufsOffset:] 182 if tcphLen != item.tcphLen { 183 // cannot coalesce with unequal tcp options len 184 return coalesceUnavailable 185 } 186 if tcphLen > 20 { 187 if !bytes.Equal(pkt[iphLen+20:iphLen+tcphLen], pktTarget[item.iphLen+20:iphLen+tcphLen]) { 188 // cannot coalesce with unequal tcp options 189 return coalesceUnavailable 190 } 191 } 192 if pkt[0]>>4 == 6 { 193 if pkt[0] != pktTarget[0] || pkt[1]>>4 != pktTarget[1]>>4 { 194 // cannot coalesce with unequal Traffic class values 195 return coalesceUnavailable 196 } 197 if pkt[7] != pktTarget[7] { 198 // cannot coalesce with unequal Hop limit values 199 return coalesceUnavailable 200 } 201 } else { 202 if pkt[1] != pktTarget[1] { 203 // cannot coalesce with unequal ToS values 204 return coalesceUnavailable 205 } 206 if pkt[6]>>5 != pktTarget[6]>>5 { 207 // cannot coalesce with unequal DF or reserved bits. MF is checked 208 // further up the stack. 209 return coalesceUnavailable 210 } 211 if pkt[8] != pktTarget[8] { 212 // cannot coalesce with unequal TTL values 213 return coalesceUnavailable 214 } 215 } 216 // seq adjacency 217 lhsLen := item.gsoSize 218 lhsLen += item.numMerged * item.gsoSize 219 if seq == item.sentSeq+uint32(lhsLen) { // pkt aligns following item from a seq num perspective 220 if item.pshSet { 221 // We cannot append to a segment that has the PSH flag set, PSH 222 // can only be set on the final segment in a reassembled group. 223 return coalesceUnavailable 224 } 225 if len(pktTarget[iphLen+tcphLen:])%int(item.gsoSize) != 0 { 226 // A smaller than gsoSize packet has been appended previously. 227 // Nothing can come after a smaller packet on the end. 228 return coalesceUnavailable 229 } 230 if gsoSize > item.gsoSize { 231 // We cannot have a larger packet following a smaller one. 232 return coalesceUnavailable 233 } 234 return coalesceAppend 235 } else if seq+uint32(gsoSize) == item.sentSeq { // pkt aligns in front of item from a seq num perspective 236 if pshSet { 237 // We cannot prepend with a segment that has the PSH flag set, PSH 238 // can only be set on the final segment in a reassembled group. 239 return coalesceUnavailable 240 } 241 if gsoSize < item.gsoSize { 242 // We cannot have a larger packet following a smaller one. 243 return coalesceUnavailable 244 } 245 if gsoSize > item.gsoSize && item.numMerged > 0 { 246 // There's at least one previous merge, and we're larger than all 247 // previous. This would put multiple smaller packets on the end. 248 return coalesceUnavailable 249 } 250 return coalescePrepend 251 } 252 return coalesceUnavailable 253 } 254 255 func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool { 256 srcAddrAt := ipv4SrcAddrOffset 257 addrSize := 4 258 if isV6 { 259 srcAddrAt = ipv6SrcAddrOffset 260 addrSize = 16 261 } 262 tcpTotalLen := uint16(len(pkt) - int(iphLen)) 263 tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], tcpTotalLen) 264 return ^checksum(pkt[iphLen:], tcpCSumNoFold) == 0 265 } 266 267 // coalesceResult represents the result of attempting to coalesce two TCP 268 // packets. 269 type coalesceResult int 270 271 const ( 272 coalesceInsufficientCap coalesceResult = 0 273 coalescePSHEnding coalesceResult = 1 274 coalesceItemInvalidCSum coalesceResult = 2 275 coalescePktInvalidCSum coalesceResult = 3 276 coalesceSuccess coalesceResult = 4 277 ) 278 279 // coalesceTCPPackets attempts to coalesce pkt with the packet described by 280 // item, returning the outcome. This function may swap bufs elements in the 281 // event of a prepend as item's bufs index is already being tracked for writing 282 // to a Device. 283 func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult { 284 var pktHead []byte // the packet that will end up at the front 285 headersLen := item.iphLen + item.tcphLen 286 coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen) 287 288 // Copy data 289 if mode == coalescePrepend { 290 pktHead = pkt 291 if cap(pkt)-bufsOffset < coalescedLen { 292 // We don't want to allocate a new underlying array if capacity is 293 // too small. 294 return coalesceInsufficientCap 295 } 296 if pshSet { 297 return coalescePSHEnding 298 } 299 if item.numMerged == 0 { 300 if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) { 301 return coalesceItemInvalidCSum 302 } 303 } 304 if !tcpChecksumValid(pkt, item.iphLen, isV6) { 305 return coalescePktInvalidCSum 306 } 307 item.sentSeq = seq 308 extendBy := coalescedLen - len(pktHead) 309 bufs[pktBuffsIndex] = append(bufs[pktBuffsIndex], make([]byte, extendBy)...) 310 copy(bufs[pktBuffsIndex][bufsOffset+len(pkt):], bufs[item.bufsIndex][bufsOffset+int(headersLen):]) 311 // Flip the slice headers in bufs as part of prepend. The index of item 312 // is already being tracked for writing. 313 bufs[item.bufsIndex], bufs[pktBuffsIndex] = bufs[pktBuffsIndex], bufs[item.bufsIndex] 314 } else { 315 pktHead = bufs[item.bufsIndex][bufsOffset:] 316 if cap(pktHead)-bufsOffset < coalescedLen { 317 // We don't want to allocate a new underlying array if capacity is 318 // too small. 319 return coalesceInsufficientCap 320 } 321 if item.numMerged == 0 { 322 if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) { 323 return coalesceItemInvalidCSum 324 } 325 } 326 if !tcpChecksumValid(pkt, item.iphLen, isV6) { 327 return coalescePktInvalidCSum 328 } 329 if pshSet { 330 // We are appending a segment with PSH set. 331 item.pshSet = pshSet 332 pktHead[item.iphLen+tcpFlagsOffset] |= tcpFlagPSH 333 } 334 extendBy := len(pkt) - int(headersLen) 335 bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...) 336 copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:]) 337 } 338 339 if gsoSize > item.gsoSize { 340 item.gsoSize = gsoSize 341 } 342 hdr := virtioNetHdr{ 343 flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb 344 hdrLen: uint16(headersLen), 345 gsoSize: uint16(item.gsoSize), 346 csumStart: uint16(item.iphLen), 347 csumOffset: 16, 348 } 349 350 // Recalculate the total len (IPv4) or payload len (IPv6). Recalculate the 351 // (IPv4) header checksum. 352 if isV6 { 353 hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6 354 binary.BigEndian.PutUint16(pktHead[4:], uint16(coalescedLen)-uint16(item.iphLen)) // set new payload len 355 } else { 356 hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4 357 pktHead[10], pktHead[11] = 0, 0 // clear checksum field 358 binary.BigEndian.PutUint16(pktHead[2:], uint16(coalescedLen)) // set new total length 359 iphCSum := ^checksum(pktHead[:item.iphLen], 0) // compute checksum 360 binary.BigEndian.PutUint16(pktHead[10:], iphCSum) // set checksum field 361 } 362 hdr.encode(bufs[item.bufsIndex][bufsOffset-virtioNetHdrLen:]) 363 364 // Calculate the pseudo header checksum and place it at the TCP checksum 365 // offset. Downstream checksum offloading will combine this with computation 366 // of the tcp header and payload checksum. 367 addrLen := 4 368 addrOffset := ipv4SrcAddrOffset 369 if isV6 { 370 addrLen = 16 371 addrOffset = ipv6SrcAddrOffset 372 } 373 srcAddrAt := bufsOffset + addrOffset 374 srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen] 375 dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2] 376 psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(coalescedLen-int(item.iphLen))) 377 binary.BigEndian.PutUint16(pktHead[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum)) 378 379 item.numMerged++ 380 return coalesceSuccess 381 } 382 383 const ( 384 ipv4FlagMoreFragments uint8 = 0x20 385 ) 386 387 const ( 388 ipv4SrcAddrOffset = 12 389 ipv6SrcAddrOffset = 8 390 maxUint16 = 1<<16 - 1 391 ) 392 393 // tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with 394 // existing packets tracked in table. It will return false when pktI is not 395 // coalesced, otherwise true. This indicates to the caller if bufs[pktI] 396 // should be written to the Device. 397 func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) (pktCoalesced bool) { 398 pkt := bufs[pktI][offset:] 399 if len(pkt) > maxUint16 { 400 // A valid IPv4 or IPv6 packet will never exceed this. 401 return false 402 } 403 iphLen := int((pkt[0] & 0x0F) * 4) 404 if isV6 { 405 iphLen = 40 406 ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:])) 407 if ipv6HPayloadLen != len(pkt)-iphLen { 408 return false 409 } 410 } else { 411 totalLen := int(binary.BigEndian.Uint16(pkt[2:])) 412 if totalLen != len(pkt) { 413 return false 414 } 415 } 416 if len(pkt) < iphLen { 417 return false 418 } 419 tcphLen := int((pkt[iphLen+12] >> 4) * 4) 420 if tcphLen < 20 || tcphLen > 60 { 421 return false 422 } 423 if len(pkt) < iphLen+tcphLen { 424 return false 425 } 426 if !isV6 { 427 if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 { 428 // no GRO support for fragmented segments for now 429 return false 430 } 431 } 432 tcpFlags := pkt[iphLen+tcpFlagsOffset] 433 var pshSet bool 434 // not a candidate if any non-ACK flags (except PSH+ACK) are set 435 if tcpFlags != tcpFlagACK { 436 if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH { 437 return false 438 } 439 pshSet = true 440 } 441 gsoSize := uint16(len(pkt) - tcphLen - iphLen) 442 // not a candidate if payload len is 0 443 if gsoSize < 1 { 444 return false 445 } 446 seq := binary.BigEndian.Uint32(pkt[iphLen+4:]) 447 srcAddrOffset := ipv4SrcAddrOffset 448 addrLen := 4 449 if isV6 { 450 srcAddrOffset = ipv6SrcAddrOffset 451 addrLen = 16 452 } 453 items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) 454 if !existing { 455 return false 456 } 457 for i := len(items) - 1; i >= 0; i-- { 458 // In the best case of packets arriving in order iterating in reverse is 459 // more efficient if there are multiple items for a given flow. This 460 // also enables a natural table.deleteAt() in the 461 // coalesceItemInvalidCSum case without the need for index tracking. 462 // This algorithm makes a best effort to coalesce in the event of 463 // unordered packets, where pkt may land anywhere in items from a 464 // sequence number perspective, however once an item is inserted into 465 // the table it is never compared across other items later. 466 item := items[i] 467 can := tcpPacketsCanCoalesce(pkt, uint8(iphLen), uint8(tcphLen), seq, pshSet, gsoSize, item, bufs, offset) 468 if can != coalesceUnavailable { 469 result := coalesceTCPPackets(can, pkt, pktI, gsoSize, seq, pshSet, &item, bufs, offset, isV6) 470 switch result { 471 case coalesceSuccess: 472 table.updateAt(item, i) 473 return true 474 case coalesceItemInvalidCSum: 475 // delete the item with an invalid csum 476 table.deleteAt(item.key, i) 477 case coalescePktInvalidCSum: 478 // no point in inserting an item that we can't coalesce 479 return false 480 default: 481 } 482 } 483 } 484 // failed to coalesce with any other packets; store the item in the flow 485 table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) 486 return false 487 } 488 489 func isTCP4NoIPOptions(b []byte) bool { 490 if len(b) < 40 { 491 return false 492 } 493 if b[0]>>4 != 4 { 494 return false 495 } 496 if b[0]&0x0F != 5 { 497 return false 498 } 499 if b[9] != unix.IPPROTO_TCP { 500 return false 501 } 502 return true 503 } 504 505 func isTCP6NoEH(b []byte) bool { 506 if len(b) < 60 { 507 return false 508 } 509 if b[0]>>4 != 6 { 510 return false 511 } 512 if b[6] != unix.IPPROTO_TCP { 513 return false 514 } 515 return true 516 } 517 518 // handleGRO evaluates bufs for GRO, and writes the indices of the resulting 519 // packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be 520 // empty (but non-nil), and are passed in to save allocs as the caller may reset 521 // and recycle them across vectors of packets. 522 func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toWrite *[]int) error { 523 for i := range bufs { 524 if offset < virtioNetHdrLen || offset > len(bufs[i])-1 { 525 return errors.New("invalid offset") 526 } 527 var coalesced bool 528 switch { 529 case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce 530 coalesced = tcpGRO(bufs, offset, i, tcp4Table, false) 531 case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce 532 coalesced = tcpGRO(bufs, offset, i, tcp6Table, true) 533 } 534 if !coalesced { 535 hdr := virtioNetHdr{} 536 err := hdr.encode(bufs[i][offset-virtioNetHdrLen:]) 537 if err != nil { 538 return err 539 } 540 *toWrite = append(*toWrite, i) 541 } 542 } 543 return nil 544 } 545 546 // tcpTSO splits packets from in into outBuffs, writing the size of each 547 // element into sizes. It returns the number of buffers populated, and/or an 548 // error. 549 func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int) (int, error) { 550 iphLen := int(hdr.csumStart) 551 srcAddrOffset := ipv6SrcAddrOffset 552 addrLen := 16 553 if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 { 554 in[10], in[11] = 0, 0 // clear ipv4 header checksum 555 srcAddrOffset = ipv4SrcAddrOffset 556 addrLen = 4 557 } 558 tcpCSumAt := int(hdr.csumStart + hdr.csumOffset) 559 in[tcpCSumAt], in[tcpCSumAt+1] = 0, 0 // clear tcp checksum 560 firstTCPSeqNum := binary.BigEndian.Uint32(in[hdr.csumStart+4:]) 561 nextSegmentDataAt := int(hdr.hdrLen) 562 i := 0 563 for ; nextSegmentDataAt < len(in); i++ { 564 if i == len(outBuffs) { 565 return i - 1, ErrTooManySegments 566 } 567 nextSegmentEnd := nextSegmentDataAt + int(hdr.gsoSize) 568 if nextSegmentEnd > len(in) { 569 nextSegmentEnd = len(in) 570 } 571 segmentDataLen := nextSegmentEnd - nextSegmentDataAt 572 totalLen := int(hdr.hdrLen) + segmentDataLen 573 sizes[i] = totalLen 574 out := outBuffs[i][outOffset:] 575 576 copy(out, in[:iphLen]) 577 if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 { 578 // For IPv4 we are responsible for incrementing the ID field, 579 // updating the total len field, and recalculating the header 580 // checksum. 581 if i > 0 { 582 id := binary.BigEndian.Uint16(out[4:]) 583 id += uint16(i) 584 binary.BigEndian.PutUint16(out[4:], id) 585 } 586 binary.BigEndian.PutUint16(out[2:], uint16(totalLen)) 587 ipv4CSum := ^checksum(out[:iphLen], 0) 588 binary.BigEndian.PutUint16(out[10:], ipv4CSum) 589 } else { 590 // For IPv6 we are responsible for updating the payload length field. 591 binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen)) 592 } 593 594 // TCP header 595 copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen]) 596 tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i)) 597 binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq) 598 if nextSegmentEnd != len(in) { 599 // FIN and PSH should only be set on last segment 600 clearFlags := tcpFlagFIN | tcpFlagPSH 601 out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags 602 } 603 604 // payload 605 copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd]) 606 607 // TCP checksum 608 tcpHLen := int(hdr.hdrLen - hdr.csumStart) 609 tcpLenForPseudo := uint16(tcpHLen + segmentDataLen) 610 tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], tcpLenForPseudo) 611 tcpCSum := ^checksum(out[hdr.csumStart:totalLen], tcpCSumNoFold) 612 binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], tcpCSum) 613 614 nextSegmentDataAt += int(hdr.gsoSize) 615 } 616 return i, nil 617 } 618 619 func gsoNoneChecksum(in []byte, cSumStart, cSumOffset uint16) error { 620 cSumAt := cSumStart + cSumOffset 621 // The initial value at the checksum offset should be summed with the 622 // checksum we compute. This is typically the pseudo-header checksum. 623 initial := binary.BigEndian.Uint16(in[cSumAt:]) 624 in[cSumAt], in[cSumAt+1] = 0, 0 625 binary.BigEndian.PutUint16(in[cSumAt:], ^checksum(in[cSumStart:], uint64(initial))) 626 return nil 627 }