github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/packet_buffer.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package stack 15 16 import ( 17 "fmt" 18 19 "github.com/SagerNet/gvisor/pkg/buffer" 20 "github.com/SagerNet/gvisor/pkg/sync" 21 "github.com/SagerNet/gvisor/pkg/tcpip" 22 tcpipbuffer "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 23 "github.com/SagerNet/gvisor/pkg/tcpip/header" 24 ) 25 26 type headerType int 27 28 const ( 29 linkHeader headerType = iota 30 networkHeader 31 transportHeader 32 numHeaderType 33 ) 34 35 // PacketBufferOptions specifies options for PacketBuffer creation. 36 type PacketBufferOptions struct { 37 // ReserveHeaderBytes is the number of bytes to reserve for headers. Total 38 // number of bytes pushed onto the headers must not exceed this value. 39 ReserveHeaderBytes int 40 41 // Data is the initial unparsed data for the new packet. If set, it will be 42 // owned by the new packet. 43 Data tcpipbuffer.VectorisedView 44 45 // IsForwardedPacket identifies that the PacketBuffer being created is for a 46 // forwarded packet. 47 IsForwardedPacket bool 48 } 49 50 // A PacketBuffer contains all the data of a network packet. 51 // 52 // As a PacketBuffer traverses up the stack, it may be necessary to pass it to 53 // multiple endpoints. 54 // 55 // The whole packet is expected to be a series of bytes in the following order: 56 // LinkHeader, NetworkHeader, TransportHeader, and Data. Any of them can be 57 // empty. Use of PacketBuffer in any other order is unsupported. 58 // 59 // PacketBuffer must be created with NewPacketBuffer. 60 // 61 // Internal structure: A PacketBuffer holds a pointer to buffer.Buffer, which 62 // exposes a logically-contiguous byte storage. The underlying storage structure 63 // is abstracted out, and should not be a concern here for most of the time. 64 // 65 // |- reserved ->| 66 // |--->| consumed (incoming) 67 // 0 V V 68 // +--------+----+----+--------------------+ 69 // | | | | current data ... | (buf) 70 // +--------+----+----+--------------------+ 71 // ^ | 72 // |<---| pushed (outgoing) 73 // 74 // When a PacketBuffer is created, a `reserved` header region can be specified, 75 // which stack pushes headers in this region for an outgoing packet. There could 76 // be no such region for an incoming packet, and `reserved` is 0. The value of 77 // `reserved` never changes in the entire lifetime of the packet. 78 // 79 // Outgoing Packet: When a header is pushed, `pushed` gets incremented by the 80 // pushed length, and the current value is stored for each header. PacketBuffer 81 // substracts this value from `reserved` to compute the starting offset of each 82 // header in `buf`. 83 // 84 // Incoming Packet: When a header is consumed (a.k.a. parsed), the current 85 // `consumed` value is stored for each header, and it gets incremented by the 86 // consumed length. PacketBuffer adds this value to `reserved` to compute the 87 // starting offset of each header in `buf`. 88 type PacketBuffer struct { 89 _ sync.NoCopy 90 91 // PacketBufferEntry is used to build an intrusive list of 92 // PacketBuffers. 93 PacketBufferEntry 94 95 // buf is the underlying buffer for the packet. See struct level docs for 96 // details. 97 buf *buffer.Buffer 98 reserved int 99 pushed int 100 consumed int 101 102 // headers stores metadata about each header. 103 headers [numHeaderType]headerInfo 104 105 // NetworkProtocolNumber is only valid when NetworkHeader().View().IsEmpty() 106 // returns false. 107 // TODO(github.com/SagerNet/issue/3574): Remove the separately passed protocol 108 // numbers in registration APIs that take a PacketBuffer. 109 NetworkProtocolNumber tcpip.NetworkProtocolNumber 110 111 // TransportProtocol is only valid if it is non zero. 112 // TODO(github.com/SagerNet/issue/3810): This and the network protocol number should 113 // be moved into the headerinfo. This should resolve the validity issue. 114 TransportProtocolNumber tcpip.TransportProtocolNumber 115 116 // Hash is the transport layer hash of this packet. A value of zero 117 // indicates no valid hash has been set. 118 Hash uint32 119 120 // Owner is implemented by task to get the uid and gid. 121 // Only set for locally generated packets. 122 Owner tcpip.PacketOwner 123 124 // The following fields are only set by the qdisc layer when the packet 125 // is added to a queue. 126 EgressRoute RouteInfo 127 GSOOptions GSO 128 129 // NatDone indicates if the packet has been manipulated as per NAT 130 // iptables rule. 131 NatDone bool 132 133 // PktType indicates the SockAddrLink.PacketType of the packet as defined in 134 // https://www.man7.org/linux/man-pages/man7/packet.7.html. 135 PktType tcpip.PacketType 136 137 // NICID is the ID of the last interface the network packet was handled at. 138 NICID tcpip.NICID 139 140 // RXTransportChecksumValidated indicates that transport checksum verification 141 // may be safely skipped. 142 RXTransportChecksumValidated bool 143 144 // NetworkPacketInfo holds an incoming packet's network-layer information. 145 NetworkPacketInfo NetworkPacketInfo 146 } 147 148 // NewPacketBuffer creates a new PacketBuffer with opts. 149 func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer { 150 pk := &PacketBuffer{ 151 buf: &buffer.Buffer{}, 152 } 153 if opts.ReserveHeaderBytes != 0 { 154 pk.buf.AppendOwned(make([]byte, opts.ReserveHeaderBytes)) 155 pk.reserved = opts.ReserveHeaderBytes 156 } 157 for _, v := range opts.Data.Views() { 158 pk.buf.AppendOwned(v) 159 } 160 if opts.IsForwardedPacket { 161 pk.NetworkPacketInfo.IsForwardedPacket = opts.IsForwardedPacket 162 } 163 return pk 164 } 165 166 // ReservedHeaderBytes returns the number of bytes initially reserved for 167 // headers. 168 func (pk *PacketBuffer) ReservedHeaderBytes() int { 169 return pk.reserved 170 } 171 172 // AvailableHeaderBytes returns the number of bytes currently available for 173 // headers. This is relevant to PacketHeader.Push method only. 174 func (pk *PacketBuffer) AvailableHeaderBytes() int { 175 return pk.reserved - pk.pushed 176 } 177 178 // LinkHeader returns the handle to link-layer header. 179 func (pk *PacketBuffer) LinkHeader() PacketHeader { 180 return PacketHeader{ 181 pk: pk, 182 typ: linkHeader, 183 } 184 } 185 186 // NetworkHeader returns the handle to network-layer header. 187 func (pk *PacketBuffer) NetworkHeader() PacketHeader { 188 return PacketHeader{ 189 pk: pk, 190 typ: networkHeader, 191 } 192 } 193 194 // TransportHeader returns the handle to transport-layer header. 195 func (pk *PacketBuffer) TransportHeader() PacketHeader { 196 return PacketHeader{ 197 pk: pk, 198 typ: transportHeader, 199 } 200 } 201 202 // HeaderSize returns the total size of all headers in bytes. 203 func (pk *PacketBuffer) HeaderSize() int { 204 return pk.pushed + pk.consumed 205 } 206 207 // Size returns the size of packet in bytes. 208 func (pk *PacketBuffer) Size() int { 209 return int(pk.buf.Size()) - pk.headerOffset() 210 } 211 212 // MemSize returns the estimation size of the pk in memory, including backing 213 // buffer data. 214 func (pk *PacketBuffer) MemSize() int { 215 return int(pk.buf.Size()) + packetBufferStructSize 216 } 217 218 // Data returns the handle to data portion of pk. 219 func (pk *PacketBuffer) Data() PacketData { 220 return PacketData{pk: pk} 221 } 222 223 // Views returns the underlying storage of the whole packet. 224 func (pk *PacketBuffer) Views() []tcpipbuffer.View { 225 var views []tcpipbuffer.View 226 offset := pk.headerOffset() 227 pk.buf.SubApply(offset, int(pk.buf.Size())-offset, func(v []byte) { 228 views = append(views, v) 229 }) 230 return views 231 } 232 233 func (pk *PacketBuffer) headerOffset() int { 234 return pk.reserved - pk.pushed 235 } 236 237 func (pk *PacketBuffer) headerOffsetOf(typ headerType) int { 238 return pk.reserved + pk.headers[typ].offset 239 } 240 241 func (pk *PacketBuffer) dataOffset() int { 242 return pk.reserved + pk.consumed 243 } 244 245 func (pk *PacketBuffer) push(typ headerType, size int) tcpipbuffer.View { 246 h := &pk.headers[typ] 247 if h.length > 0 { 248 panic(fmt.Sprintf("push(%s, %d) called after previous push", typ, size)) 249 } 250 if pk.pushed+size > pk.reserved { 251 panic(fmt.Sprintf("push(%s, %d) overflows; pushed=%d reserved=%d", typ, size, pk.pushed, pk.reserved)) 252 } 253 pk.pushed += size 254 h.offset = -pk.pushed 255 h.length = size 256 return pk.headerView(typ) 257 } 258 259 func (pk *PacketBuffer) consume(typ headerType, size int) (v tcpipbuffer.View, consumed bool) { 260 h := &pk.headers[typ] 261 if h.length > 0 { 262 panic(fmt.Sprintf("consume must not be called twice: type %s", typ)) 263 } 264 if pk.reserved+pk.consumed+size > int(pk.buf.Size()) { 265 return nil, false 266 } 267 h.offset = pk.consumed 268 h.length = size 269 pk.consumed += size 270 return pk.headerView(typ), true 271 } 272 273 func (pk *PacketBuffer) headerView(typ headerType) tcpipbuffer.View { 274 h := &pk.headers[typ] 275 if h.length == 0 { 276 return nil 277 } 278 v, ok := pk.buf.PullUp(pk.headerOffsetOf(typ), h.length) 279 if !ok { 280 panic("PullUp failed") 281 } 282 return v 283 } 284 285 // Clone makes a shallow copy of pk. 286 // 287 // Clone should be called in such cases so that no modifications is done to 288 // underlying packet payload. 289 func (pk *PacketBuffer) Clone() *PacketBuffer { 290 return &PacketBuffer{ 291 PacketBufferEntry: pk.PacketBufferEntry, 292 buf: pk.buf, 293 reserved: pk.reserved, 294 pushed: pk.pushed, 295 consumed: pk.consumed, 296 headers: pk.headers, 297 Hash: pk.Hash, 298 Owner: pk.Owner, 299 GSOOptions: pk.GSOOptions, 300 NetworkProtocolNumber: pk.NetworkProtocolNumber, 301 NatDone: pk.NatDone, 302 TransportProtocolNumber: pk.TransportProtocolNumber, 303 PktType: pk.PktType, 304 NICID: pk.NICID, 305 RXTransportChecksumValidated: pk.RXTransportChecksumValidated, 306 NetworkPacketInfo: pk.NetworkPacketInfo, 307 } 308 } 309 310 // Network returns the network header as a header.Network. 311 // 312 // Network should only be called when NetworkHeader has been set. 313 func (pk *PacketBuffer) Network() header.Network { 314 switch netProto := pk.NetworkProtocolNumber; netProto { 315 case header.IPv4ProtocolNumber: 316 return header.IPv4(pk.NetworkHeader().View()) 317 case header.IPv6ProtocolNumber: 318 return header.IPv6(pk.NetworkHeader().View()) 319 default: 320 panic(fmt.Sprintf("unknown network protocol number %d", netProto)) 321 } 322 } 323 324 // CloneToInbound makes a shallow copy of the packet buffer to be used as an 325 // inbound packet. 326 // 327 // See PacketBuffer.Data for details about how a packet buffer holds an inbound 328 // packet. 329 func (pk *PacketBuffer) CloneToInbound() *PacketBuffer { 330 newPk := &PacketBuffer{ 331 buf: pk.buf, 332 // Treat unfilled header portion as reserved. 333 reserved: pk.AvailableHeaderBytes(), 334 } 335 // TODO(github.com/SagerNet/issue/5696): reimplement conntrack so that no need to 336 // maintain this flag in the packet. Currently conntrack needs this flag to 337 // tell if a noop connection should be inserted at Input hook. Once conntrack 338 // redefines the manipulation field as mutable, we won't need the special noop 339 // connection. 340 if pk.NatDone { 341 newPk.NatDone = true 342 } 343 return newPk 344 } 345 346 // headerInfo stores metadata about a header in a packet. 347 type headerInfo struct { 348 // offset is the offset of the header in pk.buf relative to 349 // pk.buf[pk.reserved]. See the PacketBuffer struct for details. 350 offset int 351 352 // length is the length of this header. 353 length int 354 } 355 356 // PacketHeader is a handle object to a header in the underlying packet. 357 type PacketHeader struct { 358 pk *PacketBuffer 359 typ headerType 360 } 361 362 // View returns the underlying storage of h. 363 func (h PacketHeader) View() tcpipbuffer.View { 364 return h.pk.headerView(h.typ) 365 } 366 367 // Push pushes size bytes in the front of its residing packet, and returns the 368 // backing storage. Callers may only call one of Push or Consume once on each 369 // header in the lifetime of the underlying packet. 370 func (h PacketHeader) Push(size int) tcpipbuffer.View { 371 return h.pk.push(h.typ, size) 372 } 373 374 // Consume moves the first size bytes of the unparsed data portion in the packet 375 // to h, and returns the backing storage. In the case of data is shorter than 376 // size, consumed will be false, and the state of h will not be affected. 377 // Callers may only call one of Push or Consume once on each header in the 378 // lifetime of the underlying packet. 379 func (h PacketHeader) Consume(size int) (v tcpipbuffer.View, consumed bool) { 380 return h.pk.consume(h.typ, size) 381 } 382 383 // PacketData represents the data portion of a PacketBuffer. 384 type PacketData struct { 385 pk *PacketBuffer 386 } 387 388 // PullUp returns a contiguous view of size bytes from the beginning of d. 389 // Callers should not write to or keep the view for later use. 390 func (d PacketData) PullUp(size int) (tcpipbuffer.View, bool) { 391 return d.pk.buf.PullUp(d.pk.dataOffset(), size) 392 } 393 394 // DeleteFront removes count from the beginning of d. It panics if count > 395 // d.Size(). All backing storage references after the front of the d are 396 // invalidated. 397 func (d PacketData) DeleteFront(count int) { 398 if !d.pk.buf.Remove(d.pk.dataOffset(), count) { 399 panic("count > d.Size()") 400 } 401 } 402 403 // CapLength reduces d to at most length bytes. 404 func (d PacketData) CapLength(length int) { 405 if length < 0 { 406 panic("length < 0") 407 } 408 if currLength := d.Size(); currLength > length { 409 trim := currLength - length 410 d.pk.buf.Remove(int(d.pk.buf.Size())-trim, trim) 411 } 412 } 413 414 // Views returns the underlying storage of d in a slice of Views. Caller should 415 // not modify the returned slice. 416 func (d PacketData) Views() []tcpipbuffer.View { 417 var views []tcpipbuffer.View 418 offset := d.pk.dataOffset() 419 d.pk.buf.SubApply(offset, int(d.pk.buf.Size())-offset, func(v []byte) { 420 views = append(views, v) 421 }) 422 return views 423 } 424 425 // AppendView appends v into d, taking the ownership of v. 426 func (d PacketData) AppendView(v tcpipbuffer.View) { 427 d.pk.buf.AppendOwned(v) 428 } 429 430 // MergeFragment appends the data portion of frag to dst. It takes ownership of 431 // frag and frag should not be used again. 432 func MergeFragment(dst, frag *PacketBuffer) { 433 frag.buf.TrimFront(int64(frag.dataOffset())) 434 dst.buf.Merge(frag.buf) 435 } 436 437 // ReadFromVV moves at most count bytes from the beginning of srcVV to the end 438 // of d and returns the number of bytes moved. 439 func (d PacketData) ReadFromVV(srcVV *tcpipbuffer.VectorisedView, count int) int { 440 done := 0 441 for _, v := range srcVV.Views() { 442 if len(v) < count { 443 count -= len(v) 444 done += len(v) 445 d.pk.buf.AppendOwned(v) 446 } else { 447 v = v[:count] 448 count -= len(v) 449 done += len(v) 450 d.pk.buf.Append(v) 451 break 452 } 453 } 454 srcVV.TrimFront(done) 455 return done 456 } 457 458 // Size returns the number of bytes in the data payload of the packet. 459 func (d PacketData) Size() int { 460 return int(d.pk.buf.Size()) - d.pk.dataOffset() 461 } 462 463 // AsRange returns a Range representing the current data payload of the packet. 464 func (d PacketData) AsRange() Range { 465 return Range{ 466 pk: d.pk, 467 offset: d.pk.dataOffset(), 468 length: d.Size(), 469 } 470 } 471 472 // ExtractVV returns a VectorisedView of d. This method has the semantic to 473 // destruct the underlying packet, hence the packet cannot be used again. 474 // 475 // This method exists for compatibility between PacketBuffer and VectorisedView. 476 // It may be removed later and should be used with care. 477 func (d PacketData) ExtractVV() tcpipbuffer.VectorisedView { 478 var vv tcpipbuffer.VectorisedView 479 d.pk.buf.SubApply(d.pk.dataOffset(), d.pk.Size(), func(v []byte) { 480 vv.AppendView(v) 481 }) 482 return vv 483 } 484 485 // Range represents a contiguous subportion of a PacketBuffer. 486 type Range struct { 487 pk *PacketBuffer 488 offset int 489 length int 490 } 491 492 // Size returns the number of bytes in r. 493 func (r Range) Size() int { 494 return r.length 495 } 496 497 // SubRange returns a new Range starting at off bytes of r. It returns an empty 498 // range if off is out-of-bounds. 499 func (r Range) SubRange(off int) Range { 500 if off > r.length { 501 return Range{pk: r.pk} 502 } 503 return Range{ 504 pk: r.pk, 505 offset: r.offset + off, 506 length: r.length - off, 507 } 508 } 509 510 // Capped returns a new Range with the same starting point of r and length 511 // capped at max. 512 func (r Range) Capped(max int) Range { 513 if r.length <= max { 514 return r 515 } 516 return Range{ 517 pk: r.pk, 518 offset: r.offset, 519 length: max, 520 } 521 } 522 523 // AsView returns the backing storage of r if possible. It will allocate a new 524 // View if r spans multiple pieces internally. Caller should not write to the 525 // returned View in any way. 526 func (r Range) AsView() tcpipbuffer.View { 527 var allocated bool 528 var v tcpipbuffer.View 529 r.iterate(func(b []byte) { 530 if v == nil { 531 // v has not been assigned, allowing first view to be returned. 532 v = b 533 } else { 534 // v has been assigned. This range spans more than a view, a new view 535 // needs to be allocated. 536 if !allocated { 537 allocated = true 538 all := make([]byte, 0, r.length) 539 all = append(all, v...) 540 v = all 541 } 542 v = append(v, b...) 543 } 544 }) 545 return v 546 } 547 548 // ToOwnedView returns a owned copy of data in r. 549 func (r Range) ToOwnedView() tcpipbuffer.View { 550 if r.length == 0 { 551 return nil 552 } 553 all := make([]byte, 0, r.length) 554 r.iterate(func(b []byte) { 555 all = append(all, b...) 556 }) 557 return all 558 } 559 560 // Checksum calculates the RFC 1071 checksum for the underlying bytes of r. 561 func (r Range) Checksum() uint16 { 562 var c header.Checksumer 563 r.iterate(c.Add) 564 return c.Checksum() 565 } 566 567 // iterate calls fn for each piece in r. fn is always called with a non-empty 568 // slice. 569 func (r Range) iterate(fn func([]byte)) { 570 r.pk.buf.SubApply(r.offset, r.length, fn) 571 } 572 573 // PayloadSince returns packet payload starting from and including a particular 574 // header. 575 // 576 // The returned View is owned by the caller - its backing buffer is separate 577 // from the packet header's underlying packet buffer. 578 func PayloadSince(h PacketHeader) tcpipbuffer.View { 579 offset := h.pk.headerOffset() 580 for i := headerType(0); i < h.typ; i++ { 581 offset += h.pk.headers[i].length 582 } 583 return Range{ 584 pk: h.pk, 585 offset: offset, 586 length: int(h.pk.buf.Size()) - offset, 587 }.ToOwnedView() 588 }