github.com/forest33/wtun@v0.3.1/tun/tcp_offload_linux.go (about) 1 /* SPDX-License-Identifier: MIT 2 * 3 * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. 4 */ 5 6 package tun 7 8 import ( 9 "bytes" 10 "encoding/binary" 11 "errors" 12 "io" 13 "unsafe" 14 15 "golang.org/x/sys/unix" 16 17 "github.com/forest33/wtun/conn" 18 ) 19 20 const tcpFlagsOffset = 13 21 22 const ( 23 tcpFlagFIN uint8 = 0x01 24 tcpFlagPSH uint8 = 0x08 25 tcpFlagACK uint8 = 0x10 26 ) 27 28 // virtioNetHdr is defined in the kernel in include/uapi/linux/virtio_net.h. The 29 // kernel symbol is virtio_net_hdr. 30 type virtioNetHdr struct { 31 flags uint8 32 gsoType uint8 33 hdrLen uint16 34 gsoSize uint16 35 csumStart uint16 36 csumOffset uint16 37 } 38 39 func (v *virtioNetHdr) decode(b []byte) error { 40 if len(b) < virtioNetHdrLen { 41 return io.ErrShortBuffer 42 } 43 copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen), b[:virtioNetHdrLen]) 44 return nil 45 } 46 47 func (v *virtioNetHdr) encode(b []byte) error { 48 if len(b) < virtioNetHdrLen { 49 return io.ErrShortBuffer 50 } 51 copy(b[:virtioNetHdrLen], unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen)) 52 return nil 53 } 54 55 const ( 56 // virtioNetHdrLen is the length in bytes of virtioNetHdr. This matches the 57 // shape of the C ABI for its kernel counterpart -- sizeof(virtio_net_hdr). 58 virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{})) 59 ) 60 61 // flowKey represents the key for a flow. 62 type flowKey struct { 63 srcAddr, dstAddr [16]byte 64 srcPort, dstPort uint16 65 rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows. 66 } 67 68 // tcpGROTable holds flow and coalescing information for the purposes of GRO. 69 type tcpGROTable struct { 70 itemsByFlow map[flowKey][]tcpGROItem 71 itemsPool [][]tcpGROItem 72 } 73 74 func newTCPGROTable() *tcpGROTable { 75 t := &tcpGROTable{ 76 itemsByFlow: make(map[flowKey][]tcpGROItem, conn.IdealBatchSize), 77 itemsPool: make([][]tcpGROItem, conn.IdealBatchSize), 78 } 79 for i := range t.itemsPool { 80 t.itemsPool[i] = make([]tcpGROItem, 0, conn.IdealBatchSize) 81 } 82 return t 83 } 84 85 func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey { 86 key := flowKey{} 87 addrSize := dstAddr - srcAddr 88 copy(key.srcAddr[:], pkt[srcAddr:dstAddr]) 89 copy(key.dstAddr[:], pkt[dstAddr:dstAddr+addrSize]) 90 key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:]) 91 key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:]) 92 key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:]) 93 return key 94 } 95 96 // lookupOrInsert looks up a flow for the provided packet and metadata, 97 // returning the packets found for the flow, or inserting a new one if none 98 // is found. 99 func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) { 100 key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) 101 items, ok := t.itemsByFlow[key] 102 if ok { 103 return items, ok 104 } 105 // TODO: insert() performs another map lookup. This could be rearranged to avoid. 106 t.insert(pkt, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex) 107 return nil, false 108 } 109 110 // insert an item in the table for the provided packet and packet metadata. 111 func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) { 112 key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) 113 item := tcpGROItem{ 114 key: key, 115 bufsIndex: uint16(bufsIndex), 116 gsoSize: uint16(len(pkt[tcphOffset+tcphLen:])), 117 iphLen: uint8(tcphOffset), 118 tcphLen: uint8(tcphLen), 119 sentSeq: binary.BigEndian.Uint32(pkt[tcphOffset+4:]), 120 pshSet: pkt[tcphOffset+tcpFlagsOffset]&tcpFlagPSH != 0, 121 } 122 items, ok := t.itemsByFlow[key] 123 if !ok { 124 items = t.newItems() 125 } 126 items = append(items, item) 127 t.itemsByFlow[key] = items 128 } 129 130 func (t *tcpGROTable) updateAt(item tcpGROItem, i int) { 131 items, _ := t.itemsByFlow[item.key] 132 items[i] = item 133 } 134 135 func (t *tcpGROTable) deleteAt(key flowKey, i int) { 136 items, _ := t.itemsByFlow[key] 137 items = append(items[:i], items[i+1:]...) 138 t.itemsByFlow[key] = items 139 } 140 141 // tcpGROItem represents bookkeeping data for a TCP packet during the lifetime 142 // of a GRO evaluation across a vector of packets. 143 type tcpGROItem struct { 144 key flowKey 145 sentSeq uint32 // the sequence number 146 bufsIndex uint16 // the index into the original bufs slice 147 numMerged uint16 // the number of packets merged into this item 148 gsoSize uint16 // payload size 149 iphLen uint8 // ip header len 150 tcphLen uint8 // tcp header len 151 pshSet bool // psh flag is set 152 } 153 154 func (t *tcpGROTable) newItems() []tcpGROItem { 155 var items []tcpGROItem 156 items, t.itemsPool = t.itemsPool[len(t.itemsPool)-1], t.itemsPool[:len(t.itemsPool)-1] 157 return items 158 } 159 160 func (t *tcpGROTable) reset() { 161 for k, items := range t.itemsByFlow { 162 items = items[:0] 163 t.itemsPool = append(t.itemsPool, items) 164 delete(t.itemsByFlow, k) 165 } 166 } 167 168 // canCoalesce represents the outcome of checking if two TCP packets are 169 // candidates for coalescing. 170 type canCoalesce int 171 172 const ( 173 coalescePrepend canCoalesce = -1 174 coalesceUnavailable canCoalesce = 0 175 coalesceAppend canCoalesce = 1 176 ) 177 178 // tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet 179 // described by item. This function makes considerations that match the kernel's 180 // GRO self tests, which can be found in tools/testing/selftests/net/gro.c. 181 func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet bool, gsoSize uint16, item tcpGROItem, bufs [][]byte, bufsOffset int) canCoalesce { 182 pktTarget := bufs[item.bufsIndex][bufsOffset:] 183 if tcphLen != item.tcphLen { 184 // cannot coalesce with unequal tcp options len 185 return coalesceUnavailable 186 } 187 if tcphLen > 20 { 188 if !bytes.Equal(pkt[iphLen+20:iphLen+tcphLen], pktTarget[item.iphLen+20:iphLen+tcphLen]) { 189 // cannot coalesce with unequal tcp options 190 return coalesceUnavailable 191 } 192 } 193 if pkt[0]>>4 == 6 { 194 if pkt[0] != pktTarget[0] || pkt[1]>>4 != pktTarget[1]>>4 { 195 // cannot coalesce with unequal Traffic class values 196 return coalesceUnavailable 197 } 198 if pkt[7] != pktTarget[7] { 199 // cannot coalesce with unequal Hop limit values 200 return coalesceUnavailable 201 } 202 } else { 203 if pkt[1] != pktTarget[1] { 204 // cannot coalesce with unequal ToS values 205 return coalesceUnavailable 206 } 207 if pkt[6]>>5 != pktTarget[6]>>5 { 208 // cannot coalesce with unequal DF or reserved bits. MF is checked 209 // further up the stack. 210 return coalesceUnavailable 211 } 212 if pkt[8] != pktTarget[8] { 213 // cannot coalesce with unequal TTL values 214 return coalesceUnavailable 215 } 216 } 217 // seq adjacency 218 lhsLen := item.gsoSize 219 lhsLen += item.numMerged * item.gsoSize 220 if seq == item.sentSeq+uint32(lhsLen) { // pkt aligns following item from a seq num perspective 221 if item.pshSet { 222 // We cannot append to a segment that has the PSH flag set, PSH 223 // can only be set on the final segment in a reassembled group. 224 return coalesceUnavailable 225 } 226 if len(pktTarget[iphLen+tcphLen:])%int(item.gsoSize) != 0 { 227 // A smaller than gsoSize packet has been appended previously. 228 // Nothing can come after a smaller packet on the end. 229 return coalesceUnavailable 230 } 231 if gsoSize > item.gsoSize { 232 // We cannot have a larger packet following a smaller one. 233 return coalesceUnavailable 234 } 235 return coalesceAppend 236 } else if seq+uint32(gsoSize) == item.sentSeq { // pkt aligns in front of item from a seq num perspective 237 if pshSet { 238 // We cannot prepend with a segment that has the PSH flag set, PSH 239 // can only be set on the final segment in a reassembled group. 240 return coalesceUnavailable 241 } 242 if gsoSize < item.gsoSize { 243 // We cannot have a larger packet following a smaller one. 244 return coalesceUnavailable 245 } 246 if gsoSize > item.gsoSize && item.numMerged > 0 { 247 // There's at least one previous merge, and we're larger than all 248 // previous. This would put multiple smaller packets on the end. 249 return coalesceUnavailable 250 } 251 return coalescePrepend 252 } 253 return coalesceUnavailable 254 } 255 256 func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool { 257 srcAddrAt := ipv4SrcAddrOffset 258 addrSize := 4 259 if isV6 { 260 srcAddrAt = ipv6SrcAddrOffset 261 addrSize = 16 262 } 263 tcpTotalLen := uint16(len(pkt) - int(iphLen)) 264 tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], tcpTotalLen) 265 return ^checksum(pkt[iphLen:], tcpCSumNoFold) == 0 266 } 267 268 // coalesceResult represents the result of attempting to coalesce two TCP 269 // packets. 270 type coalesceResult int 271 272 const ( 273 coalesceInsufficientCap coalesceResult = 0 274 coalescePSHEnding coalesceResult = 1 275 coalesceItemInvalidCSum coalesceResult = 2 276 coalescePktInvalidCSum coalesceResult = 3 277 coalesceSuccess coalesceResult = 4 278 ) 279 280 // coalesceTCPPackets attempts to coalesce pkt with the packet described by 281 // item, returning the outcome. This function may swap bufs elements in the 282 // event of a prepend as item's bufs index is already being tracked for writing 283 // to a Device. 284 func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult { 285 var pktHead []byte // the packet that will end up at the front 286 headersLen := item.iphLen + item.tcphLen 287 coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen) 288 289 // Copy data 290 if mode == coalescePrepend { 291 pktHead = pkt 292 if cap(pkt)-bufsOffset < coalescedLen { 293 // We don't want to allocate a new underlying array if capacity is 294 // too small. 295 return coalesceInsufficientCap 296 } 297 if pshSet { 298 return coalescePSHEnding 299 } 300 if item.numMerged == 0 { 301 if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) { 302 return coalesceItemInvalidCSum 303 } 304 } 305 if !tcpChecksumValid(pkt, item.iphLen, isV6) { 306 return coalescePktInvalidCSum 307 } 308 item.sentSeq = seq 309 extendBy := coalescedLen - len(pktHead) 310 bufs[pktBuffsIndex] = append(bufs[pktBuffsIndex], make([]byte, extendBy)...) 311 copy(bufs[pktBuffsIndex][bufsOffset+len(pkt):], bufs[item.bufsIndex][bufsOffset+int(headersLen):]) 312 // Flip the slice headers in bufs as part of prepend. The index of item 313 // is already being tracked for writing. 314 bufs[item.bufsIndex], bufs[pktBuffsIndex] = bufs[pktBuffsIndex], bufs[item.bufsIndex] 315 } else { 316 pktHead = bufs[item.bufsIndex][bufsOffset:] 317 if cap(pktHead)-bufsOffset < coalescedLen { 318 // We don't want to allocate a new underlying array if capacity is 319 // too small. 320 return coalesceInsufficientCap 321 } 322 if item.numMerged == 0 { 323 if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) { 324 return coalesceItemInvalidCSum 325 } 326 } 327 if !tcpChecksumValid(pkt, item.iphLen, isV6) { 328 return coalescePktInvalidCSum 329 } 330 if pshSet { 331 // We are appending a segment with PSH set. 332 item.pshSet = pshSet 333 pktHead[item.iphLen+tcpFlagsOffset] |= tcpFlagPSH 334 } 335 extendBy := len(pkt) - int(headersLen) 336 bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...) 337 copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:]) 338 } 339 340 if gsoSize > item.gsoSize { 341 item.gsoSize = gsoSize 342 } 343 hdr := virtioNetHdr{ 344 flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb 345 hdrLen: uint16(headersLen), 346 gsoSize: uint16(item.gsoSize), 347 csumStart: uint16(item.iphLen), 348 csumOffset: 16, 349 } 350 351 // Recalculate the total len (IPv4) or payload len (IPv6). Recalculate the 352 // (IPv4) header checksum. 353 if isV6 { 354 hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6 355 binary.BigEndian.PutUint16(pktHead[4:], uint16(coalescedLen)-uint16(item.iphLen)) // set new payload len 356 } else { 357 hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4 358 pktHead[10], pktHead[11] = 0, 0 // clear checksum field 359 binary.BigEndian.PutUint16(pktHead[2:], uint16(coalescedLen)) // set new total length 360 iphCSum := ^checksum(pktHead[:item.iphLen], 0) // compute checksum 361 binary.BigEndian.PutUint16(pktHead[10:], iphCSum) // set checksum field 362 } 363 hdr.encode(bufs[item.bufsIndex][bufsOffset-virtioNetHdrLen:]) 364 365 // Calculate the pseudo header checksum and place it at the TCP checksum 366 // offset. Downstream checksum offloading will combine this with computation 367 // of the tcp header and payload checksum. 368 addrLen := 4 369 addrOffset := ipv4SrcAddrOffset 370 if isV6 { 371 addrLen = 16 372 addrOffset = ipv6SrcAddrOffset 373 } 374 srcAddrAt := bufsOffset + addrOffset 375 srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen] 376 dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2] 377 psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(coalescedLen-int(item.iphLen))) 378 binary.BigEndian.PutUint16(pktHead[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum)) 379 380 item.numMerged++ 381 return coalesceSuccess 382 } 383 384 const ( 385 ipv4FlagMoreFragments uint8 = 0x20 386 ) 387 388 const ( 389 ipv4SrcAddrOffset = 12 390 ipv6SrcAddrOffset = 8 391 maxUint16 = 1<<16 - 1 392 ) 393 394 // tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with 395 // existing packets tracked in table. It will return false when pktI is not 396 // coalesced, otherwise true. This indicates to the caller if bufs[pktI] 397 // should be written to the Device. 398 func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) (pktCoalesced bool) { 399 pkt := bufs[pktI][offset:] 400 if len(pkt) > maxUint16 { 401 // A valid IPv4 or IPv6 packet will never exceed this. 402 return false 403 } 404 iphLen := int((pkt[0] & 0x0F) * 4) 405 if isV6 { 406 iphLen = 40 407 ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:])) 408 if ipv6HPayloadLen != len(pkt)-iphLen { 409 return false 410 } 411 } else { 412 totalLen := int(binary.BigEndian.Uint16(pkt[2:])) 413 if totalLen != len(pkt) { 414 return false 415 } 416 } 417 if len(pkt) < iphLen { 418 return false 419 } 420 tcphLen := int((pkt[iphLen+12] >> 4) * 4) 421 if tcphLen < 20 || tcphLen > 60 { 422 return false 423 } 424 if len(pkt) < iphLen+tcphLen { 425 return false 426 } 427 if !isV6 { 428 if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 { 429 // no GRO support for fragmented segments for now 430 return false 431 } 432 } 433 tcpFlags := pkt[iphLen+tcpFlagsOffset] 434 var pshSet bool 435 // not a candidate if any non-ACK flags (except PSH+ACK) are set 436 if tcpFlags != tcpFlagACK { 437 if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH { 438 return false 439 } 440 pshSet = true 441 } 442 gsoSize := uint16(len(pkt) - tcphLen - iphLen) 443 // not a candidate if payload len is 0 444 if gsoSize < 1 { 445 return false 446 } 447 seq := binary.BigEndian.Uint32(pkt[iphLen+4:]) 448 srcAddrOffset := ipv4SrcAddrOffset 449 addrLen := 4 450 if isV6 { 451 srcAddrOffset = ipv6SrcAddrOffset 452 addrLen = 16 453 } 454 items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) 455 if !existing { 456 return false 457 } 458 for i := len(items) - 1; i >= 0; i-- { 459 // In the best case of packets arriving in order iterating in reverse is 460 // more efficient if there are multiple items for a given flow. This 461 // also enables a natural table.deleteAt() in the 462 // coalesceItemInvalidCSum case without the need for index tracking. 463 // This algorithm makes a best effort to coalesce in the event of 464 // unordered packets, where pkt may land anywhere in items from a 465 // sequence number perspective, however once an item is inserted into 466 // the table it is never compared across other items later. 467 item := items[i] 468 can := tcpPacketsCanCoalesce(pkt, uint8(iphLen), uint8(tcphLen), seq, pshSet, gsoSize, item, bufs, offset) 469 if can != coalesceUnavailable { 470 result := coalesceTCPPackets(can, pkt, pktI, gsoSize, seq, pshSet, &item, bufs, offset, isV6) 471 switch result { 472 case coalesceSuccess: 473 table.updateAt(item, i) 474 return true 475 case coalesceItemInvalidCSum: 476 // delete the item with an invalid csum 477 table.deleteAt(item.key, i) 478 case coalescePktInvalidCSum: 479 // no point in inserting an item that we can't coalesce 480 return false 481 default: 482 } 483 } 484 } 485 // failed to coalesce with any other packets; store the item in the flow 486 table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) 487 return false 488 } 489 490 func isTCP4NoIPOptions(b []byte) bool { 491 if len(b) < 40 { 492 return false 493 } 494 if b[0]>>4 != 4 { 495 return false 496 } 497 if b[0]&0x0F != 5 { 498 return false 499 } 500 if b[9] != unix.IPPROTO_TCP { 501 return false 502 } 503 return true 504 } 505 506 func isTCP6NoEH(b []byte) bool { 507 if len(b) < 60 { 508 return false 509 } 510 if b[0]>>4 != 6 { 511 return false 512 } 513 if b[6] != unix.IPPROTO_TCP { 514 return false 515 } 516 return true 517 } 518 519 // handleGRO evaluates bufs for GRO, and writes the indices of the resulting 520 // packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be 521 // empty (but non-nil), and are passed in to save allocs as the caller may reset 522 // and recycle them across vectors of packets. 523 func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toWrite *[]int) error { 524 for i := range bufs { 525 if offset < virtioNetHdrLen || offset > len(bufs[i])-1 { 526 return errors.New("invalid offset") 527 } 528 var coalesced bool 529 switch { 530 case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce 531 coalesced = tcpGRO(bufs, offset, i, tcp4Table, false) 532 case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce 533 coalesced = tcpGRO(bufs, offset, i, tcp6Table, true) 534 } 535 if !coalesced { 536 hdr := virtioNetHdr{} 537 err := hdr.encode(bufs[i][offset-virtioNetHdrLen:]) 538 if err != nil { 539 return err 540 } 541 *toWrite = append(*toWrite, i) 542 } 543 } 544 return nil 545 } 546 547 // tcpTSO splits packets from in into outBuffs, writing the size of each 548 // element into sizes. It returns the number of buffers populated, and/or an 549 // error. 550 func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int) (int, error) { 551 iphLen := int(hdr.csumStart) 552 srcAddrOffset := ipv6SrcAddrOffset 553 addrLen := 16 554 if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 { 555 in[10], in[11] = 0, 0 // clear ipv4 header checksum 556 srcAddrOffset = ipv4SrcAddrOffset 557 addrLen = 4 558 } 559 tcpCSumAt := int(hdr.csumStart + hdr.csumOffset) 560 in[tcpCSumAt], in[tcpCSumAt+1] = 0, 0 // clear tcp checksum 561 firstTCPSeqNum := binary.BigEndian.Uint32(in[hdr.csumStart+4:]) 562 nextSegmentDataAt := int(hdr.hdrLen) 563 i := 0 564 for ; nextSegmentDataAt < len(in); i++ { 565 if i == len(outBuffs) { 566 return i - 1, ErrTooManySegments 567 } 568 nextSegmentEnd := nextSegmentDataAt + int(hdr.gsoSize) 569 if nextSegmentEnd > len(in) { 570 nextSegmentEnd = len(in) 571 } 572 segmentDataLen := nextSegmentEnd - nextSegmentDataAt 573 totalLen := int(hdr.hdrLen) + segmentDataLen 574 sizes[i] = totalLen 575 out := outBuffs[i][outOffset:] 576 577 copy(out, in[:iphLen]) 578 if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 { 579 // For IPv4 we are responsible for incrementing the ID field, 580 // updating the total len field, and recalculating the header 581 // checksum. 582 if i > 0 { 583 id := binary.BigEndian.Uint16(out[4:]) 584 id += uint16(i) 585 binary.BigEndian.PutUint16(out[4:], id) 586 } 587 binary.BigEndian.PutUint16(out[2:], uint16(totalLen)) 588 ipv4CSum := ^checksum(out[:iphLen], 0) 589 binary.BigEndian.PutUint16(out[10:], ipv4CSum) 590 } else { 591 // For IPv6 we are responsible for updating the payload length field. 592 binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen)) 593 } 594 595 // TCP header 596 copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen]) 597 tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i)) 598 binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq) 599 if nextSegmentEnd != len(in) { 600 // FIN and PSH should only be set on last segment 601 clearFlags := tcpFlagFIN | tcpFlagPSH 602 out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags 603 } 604 605 // payload 606 copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd]) 607 608 // TCP checksum 609 tcpHLen := int(hdr.hdrLen - hdr.csumStart) 610 tcpLenForPseudo := uint16(tcpHLen + segmentDataLen) 611 tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], tcpLenForPseudo) 612 tcpCSum := ^checksum(out[hdr.csumStart:totalLen], tcpCSumNoFold) 613 binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], tcpCSum) 614 615 nextSegmentDataAt += int(hdr.gsoSize) 616 } 617 return i, nil 618 } 619 620 func gsoNoneChecksum(in []byte, cSumStart, cSumOffset uint16) error { 621 cSumAt := cSumStart + cSumOffset 622 // The initial value at the checksum offset should be summed with the 623 // checksum we compute. This is typically the pseudo-header checksum. 624 initial := binary.BigEndian.Uint16(in[cSumAt:]) 625 in[cSumAt], in[cSumAt+1] = 0, 0 626 binary.BigEndian.PutUint16(in[cSumAt:], ^checksum(in[cSumStart:], uint64(initial))) 627 return nil 628 }