github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/packet_buffer.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at //
     6  //     http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package stack
    15  
    16  import (
    17  	"fmt"
    18  
    19  	"github.com/SagerNet/gvisor/pkg/buffer"
    20  	"github.com/SagerNet/gvisor/pkg/sync"
    21  	"github.com/SagerNet/gvisor/pkg/tcpip"
    22  	tcpipbuffer "github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    24  )
    25  
    26  type headerType int
    27  
    28  const (
    29  	linkHeader headerType = iota
    30  	networkHeader
    31  	transportHeader
    32  	numHeaderType
    33  )
    34  
    35  // PacketBufferOptions specifies options for PacketBuffer creation.
    36  type PacketBufferOptions struct {
    37  	// ReserveHeaderBytes is the number of bytes to reserve for headers. Total
    38  	// number of bytes pushed onto the headers must not exceed this value.
    39  	ReserveHeaderBytes int
    40  
    41  	// Data is the initial unparsed data for the new packet. If set, it will be
    42  	// owned by the new packet.
    43  	Data tcpipbuffer.VectorisedView
    44  
    45  	// IsForwardedPacket identifies that the PacketBuffer being created is for a
    46  	// forwarded packet.
    47  	IsForwardedPacket bool
    48  }
    49  
    50  // A PacketBuffer contains all the data of a network packet.
    51  //
    52  // As a PacketBuffer traverses up the stack, it may be necessary to pass it to
    53  // multiple endpoints.
    54  //
    55  // The whole packet is expected to be a series of bytes in the following order:
    56  // LinkHeader, NetworkHeader, TransportHeader, and Data. Any of them can be
    57  // empty. Use of PacketBuffer in any other order is unsupported.
    58  //
    59  // PacketBuffer must be created with NewPacketBuffer.
    60  //
    61  // Internal structure: A PacketBuffer holds a pointer to buffer.Buffer, which
    62  // exposes a logically-contiguous byte storage. The underlying storage structure
    63  // is abstracted out, and should not be a concern here for most of the time.
    64  //
    65  // |- reserved ->|
    66  //               |--->| consumed (incoming)
    67  // 0             V    V
    68  // +--------+----+----+--------------------+
    69  // |        |    |    | current data ...   | (buf)
    70  // +--------+----+----+--------------------+
    71  //          ^    |
    72  //          |<---| pushed (outgoing)
    73  //
    74  // When a PacketBuffer is created, a `reserved` header region can be specified,
    75  // which stack pushes headers in this region for an outgoing packet. There could
    76  // be no such region for an incoming packet, and `reserved` is 0. The value of
    77  // `reserved` never changes in the entire lifetime of the packet.
    78  //
    79  // Outgoing Packet: When a header is pushed, `pushed` gets incremented by the
    80  // pushed length, and the current value is stored for each header. PacketBuffer
    81  // substracts this value from `reserved` to compute the starting offset of each
    82  // header in `buf`.
    83  //
    84  // Incoming Packet: When a header is consumed (a.k.a. parsed), the current
    85  // `consumed` value is stored for each header, and it gets incremented by the
    86  // consumed length. PacketBuffer adds this value to `reserved` to compute the
    87  // starting offset of each header in `buf`.
    88  type PacketBuffer struct {
    89  	_ sync.NoCopy
    90  
    91  	// PacketBufferEntry is used to build an intrusive list of
    92  	// PacketBuffers.
    93  	PacketBufferEntry
    94  
    95  	// buf is the underlying buffer for the packet. See struct level docs for
    96  	// details.
    97  	buf      *buffer.Buffer
    98  	reserved int
    99  	pushed   int
   100  	consumed int
   101  
   102  	// headers stores metadata about each header.
   103  	headers [numHeaderType]headerInfo
   104  
   105  	// NetworkProtocolNumber is only valid when NetworkHeader().View().IsEmpty()
   106  	// returns false.
   107  	// TODO(github.com/SagerNet/issue/3574): Remove the separately passed protocol
   108  	// numbers in registration APIs that take a PacketBuffer.
   109  	NetworkProtocolNumber tcpip.NetworkProtocolNumber
   110  
   111  	// TransportProtocol is only valid if it is non zero.
   112  	// TODO(github.com/SagerNet/issue/3810): This and the network protocol number should
   113  	// be moved into the headerinfo. This should resolve the validity issue.
   114  	TransportProtocolNumber tcpip.TransportProtocolNumber
   115  
   116  	// Hash is the transport layer hash of this packet. A value of zero
   117  	// indicates no valid hash has been set.
   118  	Hash uint32
   119  
   120  	// Owner is implemented by task to get the uid and gid.
   121  	// Only set for locally generated packets.
   122  	Owner tcpip.PacketOwner
   123  
   124  	// The following fields are only set by the qdisc layer when the packet
   125  	// is added to a queue.
   126  	EgressRoute RouteInfo
   127  	GSOOptions  GSO
   128  
   129  	// NatDone indicates if the packet has been manipulated as per NAT
   130  	// iptables rule.
   131  	NatDone bool
   132  
   133  	// PktType indicates the SockAddrLink.PacketType of the packet as defined in
   134  	// https://www.man7.org/linux/man-pages/man7/packet.7.html.
   135  	PktType tcpip.PacketType
   136  
   137  	// NICID is the ID of the last interface the network packet was handled at.
   138  	NICID tcpip.NICID
   139  
   140  	// RXTransportChecksumValidated indicates that transport checksum verification
   141  	// may be safely skipped.
   142  	RXTransportChecksumValidated bool
   143  
   144  	// NetworkPacketInfo holds an incoming packet's network-layer information.
   145  	NetworkPacketInfo NetworkPacketInfo
   146  }
   147  
   148  // NewPacketBuffer creates a new PacketBuffer with opts.
   149  func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer {
   150  	pk := &PacketBuffer{
   151  		buf: &buffer.Buffer{},
   152  	}
   153  	if opts.ReserveHeaderBytes != 0 {
   154  		pk.buf.AppendOwned(make([]byte, opts.ReserveHeaderBytes))
   155  		pk.reserved = opts.ReserveHeaderBytes
   156  	}
   157  	for _, v := range opts.Data.Views() {
   158  		pk.buf.AppendOwned(v)
   159  	}
   160  	if opts.IsForwardedPacket {
   161  		pk.NetworkPacketInfo.IsForwardedPacket = opts.IsForwardedPacket
   162  	}
   163  	return pk
   164  }
   165  
   166  // ReservedHeaderBytes returns the number of bytes initially reserved for
   167  // headers.
   168  func (pk *PacketBuffer) ReservedHeaderBytes() int {
   169  	return pk.reserved
   170  }
   171  
   172  // AvailableHeaderBytes returns the number of bytes currently available for
   173  // headers. This is relevant to PacketHeader.Push method only.
   174  func (pk *PacketBuffer) AvailableHeaderBytes() int {
   175  	return pk.reserved - pk.pushed
   176  }
   177  
   178  // LinkHeader returns the handle to link-layer header.
   179  func (pk *PacketBuffer) LinkHeader() PacketHeader {
   180  	return PacketHeader{
   181  		pk:  pk,
   182  		typ: linkHeader,
   183  	}
   184  }
   185  
   186  // NetworkHeader returns the handle to network-layer header.
   187  func (pk *PacketBuffer) NetworkHeader() PacketHeader {
   188  	return PacketHeader{
   189  		pk:  pk,
   190  		typ: networkHeader,
   191  	}
   192  }
   193  
   194  // TransportHeader returns the handle to transport-layer header.
   195  func (pk *PacketBuffer) TransportHeader() PacketHeader {
   196  	return PacketHeader{
   197  		pk:  pk,
   198  		typ: transportHeader,
   199  	}
   200  }
   201  
   202  // HeaderSize returns the total size of all headers in bytes.
   203  func (pk *PacketBuffer) HeaderSize() int {
   204  	return pk.pushed + pk.consumed
   205  }
   206  
   207  // Size returns the size of packet in bytes.
   208  func (pk *PacketBuffer) Size() int {
   209  	return int(pk.buf.Size()) - pk.headerOffset()
   210  }
   211  
   212  // MemSize returns the estimation size of the pk in memory, including backing
   213  // buffer data.
   214  func (pk *PacketBuffer) MemSize() int {
   215  	return int(pk.buf.Size()) + packetBufferStructSize
   216  }
   217  
   218  // Data returns the handle to data portion of pk.
   219  func (pk *PacketBuffer) Data() PacketData {
   220  	return PacketData{pk: pk}
   221  }
   222  
   223  // Views returns the underlying storage of the whole packet.
   224  func (pk *PacketBuffer) Views() []tcpipbuffer.View {
   225  	var views []tcpipbuffer.View
   226  	offset := pk.headerOffset()
   227  	pk.buf.SubApply(offset, int(pk.buf.Size())-offset, func(v []byte) {
   228  		views = append(views, v)
   229  	})
   230  	return views
   231  }
   232  
   233  func (pk *PacketBuffer) headerOffset() int {
   234  	return pk.reserved - pk.pushed
   235  }
   236  
   237  func (pk *PacketBuffer) headerOffsetOf(typ headerType) int {
   238  	return pk.reserved + pk.headers[typ].offset
   239  }
   240  
   241  func (pk *PacketBuffer) dataOffset() int {
   242  	return pk.reserved + pk.consumed
   243  }
   244  
   245  func (pk *PacketBuffer) push(typ headerType, size int) tcpipbuffer.View {
   246  	h := &pk.headers[typ]
   247  	if h.length > 0 {
   248  		panic(fmt.Sprintf("push(%s, %d) called after previous push", typ, size))
   249  	}
   250  	if pk.pushed+size > pk.reserved {
   251  		panic(fmt.Sprintf("push(%s, %d) overflows; pushed=%d reserved=%d", typ, size, pk.pushed, pk.reserved))
   252  	}
   253  	pk.pushed += size
   254  	h.offset = -pk.pushed
   255  	h.length = size
   256  	return pk.headerView(typ)
   257  }
   258  
   259  func (pk *PacketBuffer) consume(typ headerType, size int) (v tcpipbuffer.View, consumed bool) {
   260  	h := &pk.headers[typ]
   261  	if h.length > 0 {
   262  		panic(fmt.Sprintf("consume must not be called twice: type %s", typ))
   263  	}
   264  	if pk.reserved+pk.consumed+size > int(pk.buf.Size()) {
   265  		return nil, false
   266  	}
   267  	h.offset = pk.consumed
   268  	h.length = size
   269  	pk.consumed += size
   270  	return pk.headerView(typ), true
   271  }
   272  
   273  func (pk *PacketBuffer) headerView(typ headerType) tcpipbuffer.View {
   274  	h := &pk.headers[typ]
   275  	if h.length == 0 {
   276  		return nil
   277  	}
   278  	v, ok := pk.buf.PullUp(pk.headerOffsetOf(typ), h.length)
   279  	if !ok {
   280  		panic("PullUp failed")
   281  	}
   282  	return v
   283  }
   284  
   285  // Clone makes a shallow copy of pk.
   286  //
   287  // Clone should be called in such cases so that no modifications is done to
   288  // underlying packet payload.
   289  func (pk *PacketBuffer) Clone() *PacketBuffer {
   290  	return &PacketBuffer{
   291  		PacketBufferEntry:            pk.PacketBufferEntry,
   292  		buf:                          pk.buf,
   293  		reserved:                     pk.reserved,
   294  		pushed:                       pk.pushed,
   295  		consumed:                     pk.consumed,
   296  		headers:                      pk.headers,
   297  		Hash:                         pk.Hash,
   298  		Owner:                        pk.Owner,
   299  		GSOOptions:                   pk.GSOOptions,
   300  		NetworkProtocolNumber:        pk.NetworkProtocolNumber,
   301  		NatDone:                      pk.NatDone,
   302  		TransportProtocolNumber:      pk.TransportProtocolNumber,
   303  		PktType:                      pk.PktType,
   304  		NICID:                        pk.NICID,
   305  		RXTransportChecksumValidated: pk.RXTransportChecksumValidated,
   306  		NetworkPacketInfo:            pk.NetworkPacketInfo,
   307  	}
   308  }
   309  
   310  // Network returns the network header as a header.Network.
   311  //
   312  // Network should only be called when NetworkHeader has been set.
   313  func (pk *PacketBuffer) Network() header.Network {
   314  	switch netProto := pk.NetworkProtocolNumber; netProto {
   315  	case header.IPv4ProtocolNumber:
   316  		return header.IPv4(pk.NetworkHeader().View())
   317  	case header.IPv6ProtocolNumber:
   318  		return header.IPv6(pk.NetworkHeader().View())
   319  	default:
   320  		panic(fmt.Sprintf("unknown network protocol number %d", netProto))
   321  	}
   322  }
   323  
   324  // CloneToInbound makes a shallow copy of the packet buffer to be used as an
   325  // inbound packet.
   326  //
   327  // See PacketBuffer.Data for details about how a packet buffer holds an inbound
   328  // packet.
   329  func (pk *PacketBuffer) CloneToInbound() *PacketBuffer {
   330  	newPk := &PacketBuffer{
   331  		buf: pk.buf,
   332  		// Treat unfilled header portion as reserved.
   333  		reserved: pk.AvailableHeaderBytes(),
   334  	}
   335  	// TODO(github.com/SagerNet/issue/5696): reimplement conntrack so that no need to
   336  	// maintain this flag in the packet. Currently conntrack needs this flag to
   337  	// tell if a noop connection should be inserted at Input hook. Once conntrack
   338  	// redefines the manipulation field as mutable, we won't need the special noop
   339  	// connection.
   340  	if pk.NatDone {
   341  		newPk.NatDone = true
   342  	}
   343  	return newPk
   344  }
   345  
   346  // headerInfo stores metadata about a header in a packet.
   347  type headerInfo struct {
   348  	// offset is the offset of the header in pk.buf relative to
   349  	// pk.buf[pk.reserved]. See the PacketBuffer struct for details.
   350  	offset int
   351  
   352  	// length is the length of this header.
   353  	length int
   354  }
   355  
   356  // PacketHeader is a handle object to a header in the underlying packet.
   357  type PacketHeader struct {
   358  	pk  *PacketBuffer
   359  	typ headerType
   360  }
   361  
   362  // View returns the underlying storage of h.
   363  func (h PacketHeader) View() tcpipbuffer.View {
   364  	return h.pk.headerView(h.typ)
   365  }
   366  
   367  // Push pushes size bytes in the front of its residing packet, and returns the
   368  // backing storage. Callers may only call one of Push or Consume once on each
   369  // header in the lifetime of the underlying packet.
   370  func (h PacketHeader) Push(size int) tcpipbuffer.View {
   371  	return h.pk.push(h.typ, size)
   372  }
   373  
   374  // Consume moves the first size bytes of the unparsed data portion in the packet
   375  // to h, and returns the backing storage. In the case of data is shorter than
   376  // size, consumed will be false, and the state of h will not be affected.
   377  // Callers may only call one of Push or Consume once on each header in the
   378  // lifetime of the underlying packet.
   379  func (h PacketHeader) Consume(size int) (v tcpipbuffer.View, consumed bool) {
   380  	return h.pk.consume(h.typ, size)
   381  }
   382  
   383  // PacketData represents the data portion of a PacketBuffer.
   384  type PacketData struct {
   385  	pk *PacketBuffer
   386  }
   387  
   388  // PullUp returns a contiguous view of size bytes from the beginning of d.
   389  // Callers should not write to or keep the view for later use.
   390  func (d PacketData) PullUp(size int) (tcpipbuffer.View, bool) {
   391  	return d.pk.buf.PullUp(d.pk.dataOffset(), size)
   392  }
   393  
   394  // DeleteFront removes count from the beginning of d. It panics if count >
   395  // d.Size(). All backing storage references after the front of the d are
   396  // invalidated.
   397  func (d PacketData) DeleteFront(count int) {
   398  	if !d.pk.buf.Remove(d.pk.dataOffset(), count) {
   399  		panic("count > d.Size()")
   400  	}
   401  }
   402  
   403  // CapLength reduces d to at most length bytes.
   404  func (d PacketData) CapLength(length int) {
   405  	if length < 0 {
   406  		panic("length < 0")
   407  	}
   408  	if currLength := d.Size(); currLength > length {
   409  		trim := currLength - length
   410  		d.pk.buf.Remove(int(d.pk.buf.Size())-trim, trim)
   411  	}
   412  }
   413  
   414  // Views returns the underlying storage of d in a slice of Views. Caller should
   415  // not modify the returned slice.
   416  func (d PacketData) Views() []tcpipbuffer.View {
   417  	var views []tcpipbuffer.View
   418  	offset := d.pk.dataOffset()
   419  	d.pk.buf.SubApply(offset, int(d.pk.buf.Size())-offset, func(v []byte) {
   420  		views = append(views, v)
   421  	})
   422  	return views
   423  }
   424  
   425  // AppendView appends v into d, taking the ownership of v.
   426  func (d PacketData) AppendView(v tcpipbuffer.View) {
   427  	d.pk.buf.AppendOwned(v)
   428  }
   429  
   430  // MergeFragment appends the data portion of frag to dst. It takes ownership of
   431  // frag and frag should not be used again.
   432  func MergeFragment(dst, frag *PacketBuffer) {
   433  	frag.buf.TrimFront(int64(frag.dataOffset()))
   434  	dst.buf.Merge(frag.buf)
   435  }
   436  
   437  // ReadFromVV moves at most count bytes from the beginning of srcVV to the end
   438  // of d and returns the number of bytes moved.
   439  func (d PacketData) ReadFromVV(srcVV *tcpipbuffer.VectorisedView, count int) int {
   440  	done := 0
   441  	for _, v := range srcVV.Views() {
   442  		if len(v) < count {
   443  			count -= len(v)
   444  			done += len(v)
   445  			d.pk.buf.AppendOwned(v)
   446  		} else {
   447  			v = v[:count]
   448  			count -= len(v)
   449  			done += len(v)
   450  			d.pk.buf.Append(v)
   451  			break
   452  		}
   453  	}
   454  	srcVV.TrimFront(done)
   455  	return done
   456  }
   457  
   458  // Size returns the number of bytes in the data payload of the packet.
   459  func (d PacketData) Size() int {
   460  	return int(d.pk.buf.Size()) - d.pk.dataOffset()
   461  }
   462  
   463  // AsRange returns a Range representing the current data payload of the packet.
   464  func (d PacketData) AsRange() Range {
   465  	return Range{
   466  		pk:     d.pk,
   467  		offset: d.pk.dataOffset(),
   468  		length: d.Size(),
   469  	}
   470  }
   471  
   472  // ExtractVV returns a VectorisedView of d. This method has the semantic to
   473  // destruct the underlying packet, hence the packet cannot be used again.
   474  //
   475  // This method exists for compatibility between PacketBuffer and VectorisedView.
   476  // It may be removed later and should be used with care.
   477  func (d PacketData) ExtractVV() tcpipbuffer.VectorisedView {
   478  	var vv tcpipbuffer.VectorisedView
   479  	d.pk.buf.SubApply(d.pk.dataOffset(), d.pk.Size(), func(v []byte) {
   480  		vv.AppendView(v)
   481  	})
   482  	return vv
   483  }
   484  
   485  // Range represents a contiguous subportion of a PacketBuffer.
   486  type Range struct {
   487  	pk     *PacketBuffer
   488  	offset int
   489  	length int
   490  }
   491  
   492  // Size returns the number of bytes in r.
   493  func (r Range) Size() int {
   494  	return r.length
   495  }
   496  
   497  // SubRange returns a new Range starting at off bytes of r. It returns an empty
   498  // range if off is out-of-bounds.
   499  func (r Range) SubRange(off int) Range {
   500  	if off > r.length {
   501  		return Range{pk: r.pk}
   502  	}
   503  	return Range{
   504  		pk:     r.pk,
   505  		offset: r.offset + off,
   506  		length: r.length - off,
   507  	}
   508  }
   509  
   510  // Capped returns a new Range with the same starting point of r and length
   511  // capped at max.
   512  func (r Range) Capped(max int) Range {
   513  	if r.length <= max {
   514  		return r
   515  	}
   516  	return Range{
   517  		pk:     r.pk,
   518  		offset: r.offset,
   519  		length: max,
   520  	}
   521  }
   522  
   523  // AsView returns the backing storage of r if possible. It will allocate a new
   524  // View if r spans multiple pieces internally. Caller should not write to the
   525  // returned View in any way.
   526  func (r Range) AsView() tcpipbuffer.View {
   527  	var allocated bool
   528  	var v tcpipbuffer.View
   529  	r.iterate(func(b []byte) {
   530  		if v == nil {
   531  			// v has not been assigned, allowing first view to be returned.
   532  			v = b
   533  		} else {
   534  			// v has been assigned. This range spans more than a view, a new view
   535  			// needs to be allocated.
   536  			if !allocated {
   537  				allocated = true
   538  				all := make([]byte, 0, r.length)
   539  				all = append(all, v...)
   540  				v = all
   541  			}
   542  			v = append(v, b...)
   543  		}
   544  	})
   545  	return v
   546  }
   547  
   548  // ToOwnedView returns a owned copy of data in r.
   549  func (r Range) ToOwnedView() tcpipbuffer.View {
   550  	if r.length == 0 {
   551  		return nil
   552  	}
   553  	all := make([]byte, 0, r.length)
   554  	r.iterate(func(b []byte) {
   555  		all = append(all, b...)
   556  	})
   557  	return all
   558  }
   559  
   560  // Checksum calculates the RFC 1071 checksum for the underlying bytes of r.
   561  func (r Range) Checksum() uint16 {
   562  	var c header.Checksumer
   563  	r.iterate(c.Add)
   564  	return c.Checksum()
   565  }
   566  
   567  // iterate calls fn for each piece in r. fn is always called with a non-empty
   568  // slice.
   569  func (r Range) iterate(fn func([]byte)) {
   570  	r.pk.buf.SubApply(r.offset, r.length, fn)
   571  }
   572  
   573  // PayloadSince returns packet payload starting from and including a particular
   574  // header.
   575  //
   576  // The returned View is owned by the caller - its backing buffer is separate
   577  // from the packet header's underlying packet buffer.
   578  func PayloadSince(h PacketHeader) tcpipbuffer.View {
   579  	offset := h.pk.headerOffset()
   580  	for i := headerType(0); i < h.typ; i++ {
   581  		offset += h.pk.headers[i].length
   582  	}
   583  	return Range{
   584  		pk:     h.pk,
   585  		offset: offset,
   586  		length: int(h.pk.buf.Size()) - offset,
   587  	}.ToOwnedView()
   588  }