inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/stack/packet_buffer.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package stack 15 16 import ( 17 "fmt" 18 19 "inet.af/netstack/buffer" 20 "inet.af/netstack/sync" 21 "inet.af/netstack/tcpip" 22 tcpipbuffer "inet.af/netstack/tcpip/buffer" 23 "inet.af/netstack/tcpip/header" 24 ) 25 26 type headerType int 27 28 const ( 29 linkHeader headerType = iota 30 networkHeader 31 transportHeader 32 numHeaderType 33 ) 34 35 var pkPool = sync.Pool{ 36 New: func() interface{} { 37 return &PacketBuffer{} 38 }, 39 } 40 41 // PacketBufferOptions specifies options for PacketBuffer creation. 42 type PacketBufferOptions struct { 43 // ReserveHeaderBytes is the number of bytes to reserve for headers. Total 44 // number of bytes pushed onto the headers must not exceed this value. 45 ReserveHeaderBytes int 46 47 // Data is the initial unparsed data for the new packet. If set, it will be 48 // owned by the new packet. 49 Data tcpipbuffer.VectorisedView 50 51 // IsForwardedPacket identifies that the PacketBuffer being created is for a 52 // forwarded packet. 53 IsForwardedPacket bool 54 } 55 56 // A PacketBuffer contains all the data of a network packet. 57 // 58 // As a PacketBuffer traverses up the stack, it may be necessary to pass it to 59 // multiple endpoints. 60 // 61 // The whole packet is expected to be a series of bytes in the following order: 62 // LinkHeader, NetworkHeader, TransportHeader, and Data. Any of them can be 63 // empty. Use of PacketBuffer in any other order is unsupported. 64 // 65 // PacketBuffer must be created with NewPacketBuffer. 66 // 67 // Internal structure: A PacketBuffer holds a pointer to buffer.Buffer, which 68 // exposes a logically-contiguous byte storage. The underlying storage structure 69 // is abstracted out, and should not be a concern here for most of the time. 70 // 71 // |- reserved ->| 72 // |--->| consumed (incoming) 73 // 0 V V 74 // +--------+----+----+--------------------+ 75 // | | | | current data ... | (buf) 76 // +--------+----+----+--------------------+ 77 // ^ | 78 // |<---| pushed (outgoing) 79 // 80 // When a PacketBuffer is created, a `reserved` header region can be specified, 81 // which stack pushes headers in this region for an outgoing packet. There could 82 // be no such region for an incoming packet, and `reserved` is 0. The value of 83 // `reserved` never changes in the entire lifetime of the packet. 84 // 85 // Outgoing Packet: When a header is pushed, `pushed` gets incremented by the 86 // pushed length, and the current value is stored for each header. PacketBuffer 87 // substracts this value from `reserved` to compute the starting offset of each 88 // header in `buf`. 89 // 90 // Incoming Packet: When a header is consumed (a.k.a. parsed), the current 91 // `consumed` value is stored for each header, and it gets incremented by the 92 // consumed length. PacketBuffer adds this value to `reserved` to compute the 93 // starting offset of each header in `buf`. 94 type PacketBuffer struct { 95 _ sync.NoCopy 96 97 packetBufferRefs 98 99 // PacketBufferEntry is used to build an intrusive list of 100 // PacketBuffers. 101 PacketBufferEntry 102 103 // buf is the underlying buffer for the packet. See struct level docs for 104 // details. 105 buf *buffer.Buffer 106 reserved int 107 pushed int 108 consumed int 109 110 // headers stores metadata about each header. 111 headers [numHeaderType]headerInfo 112 113 // NetworkProtocolNumber is only valid when NetworkHeader().View().IsEmpty() 114 // returns false. 115 // TODO(gvisor.dev/issue/3574): Remove the separately passed protocol 116 // numbers in registration APIs that take a PacketBuffer. 117 NetworkProtocolNumber tcpip.NetworkProtocolNumber 118 119 // TransportProtocol is only valid if it is non zero. 120 // TODO(gvisor.dev/issue/3810): This and the network protocol number should 121 // be moved into the headerinfo. This should resolve the validity issue. 122 TransportProtocolNumber tcpip.TransportProtocolNumber 123 124 // Hash is the transport layer hash of this packet. A value of zero 125 // indicates no valid hash has been set. 126 Hash uint32 127 128 // Owner is implemented by task to get the uid and gid. 129 // Only set for locally generated packets. 130 Owner tcpip.PacketOwner 131 132 // The following fields are only set by the qdisc layer when the packet 133 // is added to a queue. 134 EgressRoute RouteInfo 135 GSOOptions GSO 136 137 // SNATDone indicates if the packet's source has been manipulated as per 138 // iptables NAT table. 139 SNATDone bool 140 141 // DNATDone indicates if the packet's destination has been manipulated as per 142 // iptables NAT table. 143 DNATDone bool 144 145 // PktType indicates the SockAddrLink.PacketType of the packet as defined in 146 // https://www.man7.org/linux/man-pages/man7/packet.7.html. 147 PktType tcpip.PacketType 148 149 // NICID is the ID of the last interface the network packet was handled at. 150 NICID tcpip.NICID 151 152 // RXTransportChecksumValidated indicates that transport checksum verification 153 // may be safely skipped. 154 RXTransportChecksumValidated bool 155 156 // NetworkPacketInfo holds an incoming packet's network-layer information. 157 NetworkPacketInfo NetworkPacketInfo 158 159 tuple *tuple 160 161 preserveObject bool 162 } 163 164 // NewPacketBuffer creates a new PacketBuffer with opts. 165 func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer { 166 pk := pkPool.Get().(*PacketBuffer) 167 pk.reset() 168 pk.buf = &buffer.Buffer{} 169 if opts.ReserveHeaderBytes != 0 { 170 pk.buf.AppendOwned(make([]byte, opts.ReserveHeaderBytes)) 171 pk.reserved = opts.ReserveHeaderBytes 172 } 173 for _, v := range opts.Data.Views() { 174 pk.buf.AppendOwned(v) 175 } 176 if opts.IsForwardedPacket { 177 pk.NetworkPacketInfo.IsForwardedPacket = opts.IsForwardedPacket 178 } 179 pk.InitRefs() 180 return pk 181 } 182 183 // PreserveObject marks this PacketBuffer so it is not recycled by internal 184 // pooling. 185 func (pk *PacketBuffer) PreserveObject() { 186 pk.preserveObject = true 187 } 188 189 // DecRef decrements the PacketBuffer's refcount. If the refcount is 190 // decremented to zero, the PacketBuffer is returned to the PacketBuffer 191 // pool. 192 func (pk *PacketBuffer) DecRef() { 193 pk.packetBufferRefs.DecRef(func() { 194 if pk.packetBufferRefs.refCount == 0 && !pk.preserveObject { 195 pkPool.Put(pk) 196 } 197 }) 198 } 199 200 func (pk *PacketBuffer) reset() { 201 *pk = PacketBuffer{} 202 } 203 204 // ReservedHeaderBytes returns the number of bytes initially reserved for 205 // headers. 206 func (pk *PacketBuffer) ReservedHeaderBytes() int { 207 return pk.reserved 208 } 209 210 // AvailableHeaderBytes returns the number of bytes currently available for 211 // headers. This is relevant to PacketHeader.Push method only. 212 func (pk *PacketBuffer) AvailableHeaderBytes() int { 213 return pk.reserved - pk.pushed 214 } 215 216 // LinkHeader returns the handle to link-layer header. 217 func (pk *PacketBuffer) LinkHeader() PacketHeader { 218 return PacketHeader{ 219 pk: pk, 220 typ: linkHeader, 221 } 222 } 223 224 // NetworkHeader returns the handle to network-layer header. 225 func (pk *PacketBuffer) NetworkHeader() PacketHeader { 226 return PacketHeader{ 227 pk: pk, 228 typ: networkHeader, 229 } 230 } 231 232 // TransportHeader returns the handle to transport-layer header. 233 func (pk *PacketBuffer) TransportHeader() PacketHeader { 234 return PacketHeader{ 235 pk: pk, 236 typ: transportHeader, 237 } 238 } 239 240 // HeaderSize returns the total size of all headers in bytes. 241 func (pk *PacketBuffer) HeaderSize() int { 242 return pk.pushed + pk.consumed 243 } 244 245 // Size returns the size of packet in bytes. 246 func (pk *PacketBuffer) Size() int { 247 return int(pk.buf.Size()) - pk.headerOffset() 248 } 249 250 // MemSize returns the estimation size of the pk in memory, including backing 251 // buffer data. 252 func (pk *PacketBuffer) MemSize() int { 253 return int(pk.buf.Size()) + packetBufferStructSize 254 } 255 256 // Data returns the handle to data portion of pk. 257 func (pk *PacketBuffer) Data() PacketData { 258 return PacketData{pk: pk} 259 } 260 261 // Views returns the underlying storage of the whole packet. 262 func (pk *PacketBuffer) Views() []tcpipbuffer.View { 263 var views []tcpipbuffer.View 264 offset := pk.headerOffset() 265 pk.buf.SubApply(offset, int(pk.buf.Size())-offset, func(v []byte) { 266 views = append(views, v) 267 }) 268 return views 269 } 270 271 func (pk *PacketBuffer) headerOffset() int { 272 return pk.reserved - pk.pushed 273 } 274 275 func (pk *PacketBuffer) headerOffsetOf(typ headerType) int { 276 return pk.reserved + pk.headers[typ].offset 277 } 278 279 func (pk *PacketBuffer) dataOffset() int { 280 return pk.reserved + pk.consumed 281 } 282 283 func (pk *PacketBuffer) push(typ headerType, size int) tcpipbuffer.View { 284 h := &pk.headers[typ] 285 if h.length > 0 { 286 panic(fmt.Sprintf("push(%s, %d) called after previous push", typ, size)) 287 } 288 if pk.pushed+size > pk.reserved { 289 panic(fmt.Sprintf("push(%s, %d) overflows; pushed=%d reserved=%d", typ, size, pk.pushed, pk.reserved)) 290 } 291 pk.pushed += size 292 h.offset = -pk.pushed 293 h.length = size 294 return pk.headerView(typ) 295 } 296 297 func (pk *PacketBuffer) consume(typ headerType, size int) (v tcpipbuffer.View, consumed bool) { 298 h := &pk.headers[typ] 299 if h.length > 0 { 300 panic(fmt.Sprintf("consume must not be called twice: type %s", typ)) 301 } 302 if pk.reserved+pk.consumed+size > int(pk.buf.Size()) { 303 return nil, false 304 } 305 h.offset = pk.consumed 306 h.length = size 307 pk.consumed += size 308 return pk.headerView(typ), true 309 } 310 311 func (pk *PacketBuffer) headerView(typ headerType) tcpipbuffer.View { 312 h := &pk.headers[typ] 313 if h.length == 0 { 314 return nil 315 } 316 v, ok := pk.buf.PullUp(pk.headerOffsetOf(typ), h.length) 317 if !ok { 318 panic("PullUp failed") 319 } 320 return v 321 } 322 323 // Clone makes a semi-deep copy of pk. The underlying packet payload is 324 // shared. Hence, no modifications is done to underlying packet payload. 325 func (pk *PacketBuffer) Clone() *PacketBuffer { 326 newPk := pkPool.Get().(*PacketBuffer) 327 newPk.PacketBufferEntry = pk.PacketBufferEntry 328 newPk.buf = pk.buf.Clone() 329 newPk.reserved = pk.reserved 330 newPk.pushed = pk.pushed 331 newPk.consumed = pk.consumed 332 newPk.headers = pk.headers 333 newPk.Hash = pk.Hash 334 newPk.Owner = pk.Owner 335 newPk.GSOOptions = pk.GSOOptions 336 newPk.NetworkProtocolNumber = pk.NetworkProtocolNumber 337 newPk.DNATDone = pk.DNATDone 338 newPk.SNATDone = pk.SNATDone 339 newPk.TransportProtocolNumber = pk.TransportProtocolNumber 340 newPk.PktType = pk.PktType 341 newPk.NICID = pk.NICID 342 newPk.RXTransportChecksumValidated = pk.RXTransportChecksumValidated 343 newPk.NetworkPacketInfo = pk.NetworkPacketInfo 344 newPk.tuple = pk.tuple 345 newPk.InitRefs() 346 return newPk 347 } 348 349 // Network returns the network header as a header.Network. 350 // 351 // Network should only be called when NetworkHeader has been set. 352 func (pk *PacketBuffer) Network() header.Network { 353 switch netProto := pk.NetworkProtocolNumber; netProto { 354 case header.IPv4ProtocolNumber: 355 return header.IPv4(pk.NetworkHeader().View()) 356 case header.IPv6ProtocolNumber: 357 return header.IPv6(pk.NetworkHeader().View()) 358 default: 359 panic(fmt.Sprintf("unknown network protocol number %d", netProto)) 360 } 361 } 362 363 // CloneToInbound makes a semi-deep copy of the packet buffer (similar to 364 // Clone) to be used as an inbound packet. 365 // 366 // See PacketBuffer.Data for details about how a packet buffer holds an inbound 367 // packet. 368 func (pk *PacketBuffer) CloneToInbound() *PacketBuffer { 369 newPk := pkPool.Get().(*PacketBuffer) 370 newPk.reset() 371 newPk.buf = pk.buf.Clone() 372 newPk.InitRefs() 373 // Treat unfilled header portion as reserved. 374 newPk.reserved = pk.AvailableHeaderBytes() 375 newPk.tuple = pk.tuple 376 return newPk 377 } 378 379 // DeepCopyForForwarding creates a deep copy of the packet buffer for 380 // forwarding. 381 // 382 // The returned packet buffer will have the network and transport headers 383 // set if the original packet buffer did. 384 func (pk *PacketBuffer) DeepCopyForForwarding(reservedHeaderBytes int) *PacketBuffer { 385 newPk := NewPacketBuffer(PacketBufferOptions{ 386 ReserveHeaderBytes: reservedHeaderBytes, 387 Data: PayloadSince(pk.NetworkHeader()).ToVectorisedView(), 388 IsForwardedPacket: true, 389 }) 390 391 { 392 consumeBytes := pk.NetworkHeader().View().Size() 393 if _, consumed := newPk.NetworkHeader().Consume(consumeBytes); !consumed { 394 panic(fmt.Sprintf("expected to consume network header %d bytes from new packet", consumeBytes)) 395 } 396 newPk.NetworkProtocolNumber = pk.NetworkProtocolNumber 397 } 398 399 { 400 consumeBytes := pk.TransportHeader().View().Size() 401 if _, consumed := newPk.TransportHeader().Consume(consumeBytes); !consumed { 402 panic(fmt.Sprintf("expected to consume transport header %d bytes from new packet", consumeBytes)) 403 } 404 newPk.TransportProtocolNumber = pk.TransportProtocolNumber 405 } 406 407 newPk.tuple = pk.tuple 408 409 return newPk 410 } 411 412 // IncRef increases the reference count on each PacketBuffer 413 // stored in the PacketBufferList. 414 func (pk *PacketBufferList) IncRef() { 415 for pb := pk.Front(); pb != nil; pb = pb.Next() { 416 pb.IncRef() 417 } 418 } 419 420 // DecRef decreases the reference count on each PacketBuffer 421 // stored in the PacketBufferList. 422 func (pk *PacketBufferList) DecRef() { 423 // Using a while-loop here (instead of for-loop) because DecRef() can cause 424 // the pb to be recycled. If it is recycled during execution of this loop, 425 // there is a possibility of a data race during a call to pb.Next(). 426 pb := pk.Front() 427 for pb != nil { 428 next := pb.Next() 429 pb.DecRef() 430 pb = next 431 } 432 } 433 434 // headerInfo stores metadata about a header in a packet. 435 type headerInfo struct { 436 // offset is the offset of the header in pk.buf relative to 437 // pk.buf[pk.reserved]. See the PacketBuffer struct for details. 438 offset int 439 440 // length is the length of this header. 441 length int 442 } 443 444 // PacketHeader is a handle object to a header in the underlying packet. 445 type PacketHeader struct { 446 pk *PacketBuffer 447 typ headerType 448 } 449 450 // View returns the underlying storage of h. 451 func (h PacketHeader) View() tcpipbuffer.View { 452 return h.pk.headerView(h.typ) 453 } 454 455 // Push pushes size bytes in the front of its residing packet, and returns the 456 // backing storage. Callers may only call one of Push or Consume once on each 457 // header in the lifetime of the underlying packet. 458 func (h PacketHeader) Push(size int) tcpipbuffer.View { 459 return h.pk.push(h.typ, size) 460 } 461 462 // Consume moves the first size bytes of the unparsed data portion in the packet 463 // to h, and returns the backing storage. In the case of data is shorter than 464 // size, consumed will be false, and the state of h will not be affected. 465 // Callers may only call one of Push or Consume once on each header in the 466 // lifetime of the underlying packet. 467 func (h PacketHeader) Consume(size int) (v tcpipbuffer.View, consumed bool) { 468 return h.pk.consume(h.typ, size) 469 } 470 471 // PacketData represents the data portion of a PacketBuffer. 472 type PacketData struct { 473 pk *PacketBuffer 474 } 475 476 // PullUp returns a contiguous view of size bytes from the beginning of d. 477 // Callers should not write to or keep the view for later use. 478 func (d PacketData) PullUp(size int) (tcpipbuffer.View, bool) { 479 return d.pk.buf.PullUp(d.pk.dataOffset(), size) 480 } 481 482 // Consume is the same as PullUp except that is additionally consumes the 483 // returned bytes. Subsequent PullUp or Consume will not return these bytes. 484 func (d PacketData) Consume(size int) (tcpipbuffer.View, bool) { 485 v, ok := d.PullUp(size) 486 if ok { 487 d.pk.consumed += size 488 } 489 return v, ok 490 } 491 492 // CapLength reduces d to at most length bytes. 493 func (d PacketData) CapLength(length int) { 494 if length < 0 { 495 panic("length < 0") 496 } 497 if currLength := d.Size(); currLength > length { 498 trim := currLength - length 499 d.pk.buf.Remove(int(d.pk.buf.Size())-trim, trim) 500 } 501 } 502 503 // Views returns the underlying storage of d in a slice of Views. Caller should 504 // not modify the returned slice. 505 func (d PacketData) Views() []tcpipbuffer.View { 506 var views []tcpipbuffer.View 507 offset := d.pk.dataOffset() 508 d.pk.buf.SubApply(offset, int(d.pk.buf.Size())-offset, func(v []byte) { 509 views = append(views, v) 510 }) 511 return views 512 } 513 514 // AppendView appends v into d, taking the ownership of v. 515 func (d PacketData) AppendView(v tcpipbuffer.View) { 516 d.pk.buf.AppendOwned(v) 517 } 518 519 // MergeFragment appends the data portion of frag to dst. It modifies 520 // frag and frag should not be used again. 521 func MergeFragment(dst, frag *PacketBuffer) { 522 frag.buf.TrimFront(int64(frag.dataOffset())) 523 dst.buf.Merge(frag.buf) 524 } 525 526 // ReadFromVV moves at most count bytes from the beginning of srcVV to the end 527 // of d and returns the number of bytes moved. 528 func (d PacketData) ReadFromVV(srcVV *tcpipbuffer.VectorisedView, count int) int { 529 done := 0 530 for _, v := range srcVV.Views() { 531 if len(v) < count { 532 count -= len(v) 533 done += len(v) 534 d.pk.buf.AppendOwned(v) 535 } else { 536 v = v[:count] 537 count -= len(v) 538 done += len(v) 539 d.pk.buf.Append(v) 540 break 541 } 542 } 543 srcVV.TrimFront(done) 544 return done 545 } 546 547 // Size returns the number of bytes in the data payload of the packet. 548 func (d PacketData) Size() int { 549 return int(d.pk.buf.Size()) - d.pk.dataOffset() 550 } 551 552 // AsRange returns a Range representing the current data payload of the packet. 553 func (d PacketData) AsRange() Range { 554 return Range{ 555 pk: d.pk, 556 offset: d.pk.dataOffset(), 557 length: d.Size(), 558 } 559 } 560 561 // ExtractVV returns a VectorisedView of d. This method has the semantic to 562 // destruct the underlying packet, hence the packet cannot be used again. 563 // 564 // This method exists for compatibility between PacketBuffer and VectorisedView. 565 // It may be removed later and should be used with care. 566 func (d PacketData) ExtractVV() tcpipbuffer.VectorisedView { 567 var vv tcpipbuffer.VectorisedView 568 d.pk.buf.SubApply(d.pk.dataOffset(), d.pk.Size(), func(v []byte) { 569 vv.AppendView(v) 570 }) 571 return vv 572 } 573 574 // Range represents a contiguous subportion of a PacketBuffer. 575 type Range struct { 576 pk *PacketBuffer 577 offset int 578 length int 579 } 580 581 // Size returns the number of bytes in r. 582 func (r Range) Size() int { 583 return r.length 584 } 585 586 // SubRange returns a new Range starting at off bytes of r. It returns an empty 587 // range if off is out-of-bounds. 588 func (r Range) SubRange(off int) Range { 589 if off > r.length { 590 return Range{pk: r.pk} 591 } 592 return Range{ 593 pk: r.pk, 594 offset: r.offset + off, 595 length: r.length - off, 596 } 597 } 598 599 // Capped returns a new Range with the same starting point of r and length 600 // capped at max. 601 func (r Range) Capped(max int) Range { 602 if r.length <= max { 603 return r 604 } 605 return Range{ 606 pk: r.pk, 607 offset: r.offset, 608 length: max, 609 } 610 } 611 612 // AsView returns the backing storage of r if possible. It will allocate a new 613 // View if r spans multiple pieces internally. Caller should not write to the 614 // returned View in any way. 615 func (r Range) AsView() tcpipbuffer.View { 616 var allocated bool 617 var v tcpipbuffer.View 618 r.iterate(func(b []byte) { 619 if v == nil { 620 // v has not been assigned, allowing first view to be returned. 621 v = b 622 } else { 623 // v has been assigned. This range spans more than a view, a new view 624 // needs to be allocated. 625 if !allocated { 626 allocated = true 627 all := make([]byte, 0, r.length) 628 all = append(all, v...) 629 v = all 630 } 631 v = append(v, b...) 632 } 633 }) 634 return v 635 } 636 637 // ToOwnedView returns a owned copy of data in r. 638 func (r Range) ToOwnedView() tcpipbuffer.View { 639 if r.length == 0 { 640 return nil 641 } 642 all := make([]byte, 0, r.length) 643 r.iterate(func(b []byte) { 644 all = append(all, b...) 645 }) 646 return all 647 } 648 649 // Checksum calculates the RFC 1071 checksum for the underlying bytes of r. 650 func (r Range) Checksum() uint16 { 651 var c header.Checksumer 652 r.iterate(c.Add) 653 return c.Checksum() 654 } 655 656 // iterate calls fn for each piece in r. fn is always called with a non-empty 657 // slice. 658 func (r Range) iterate(fn func([]byte)) { 659 r.pk.buf.SubApply(r.offset, r.length, fn) 660 } 661 662 // PayloadSince returns packet payload starting from and including a particular 663 // header. 664 // 665 // The returned View is owned by the caller - its backing buffer is separate 666 // from the packet header's underlying packet buffer. 667 func PayloadSince(h PacketHeader) tcpipbuffer.View { 668 offset := h.pk.headerOffset() 669 for i := headerType(0); i < h.typ; i++ { 670 offset += h.pk.headers[i].length 671 } 672 return Range{ 673 pk: h.pk, 674 offset: offset, 675 length: int(h.pk.buf.Size()) - offset, 676 }.ToOwnedView() 677 }