github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/network/ipv4/icmp.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ipv4
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/tcpip"
    21  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    22  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    24  )
    25  
    26  // icmpv4DestinationUnreachableSockError is a general ICMPv4 Destination
    27  // Unreachable error.
    28  //
    29  // +stateify savable
    30  type icmpv4DestinationUnreachableSockError struct{}
    31  
    32  // Origin implements tcpip.SockErrorCause.
    33  func (*icmpv4DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin {
    34  	return tcpip.SockExtErrorOriginICMP
    35  }
    36  
    37  // Type implements tcpip.SockErrorCause.
    38  func (*icmpv4DestinationUnreachableSockError) Type() uint8 {
    39  	return uint8(header.ICMPv4DstUnreachable)
    40  }
    41  
    42  // Info implements tcpip.SockErrorCause.
    43  func (*icmpv4DestinationUnreachableSockError) Info() uint32 {
    44  	return 0
    45  }
    46  
    47  var _ stack.TransportError = (*icmpv4DestinationHostUnreachableSockError)(nil)
    48  
    49  // icmpv4DestinationHostUnreachableSockError is an ICMPv4 Destination Host
    50  // Unreachable error.
    51  //
    52  // It indicates that a packet was not able to reach the destination host.
    53  //
    54  // +stateify savable
    55  type icmpv4DestinationHostUnreachableSockError struct {
    56  	icmpv4DestinationUnreachableSockError
    57  }
    58  
    59  // Code implements tcpip.SockErrorCause.
    60  func (*icmpv4DestinationHostUnreachableSockError) Code() uint8 {
    61  	return uint8(header.ICMPv4HostUnreachable)
    62  }
    63  
    64  // Kind implements stack.TransportError.
    65  func (*icmpv4DestinationHostUnreachableSockError) Kind() stack.TransportErrorKind {
    66  	return stack.DestinationHostUnreachableTransportError
    67  }
    68  
    69  var _ stack.TransportError = (*icmpv4DestinationPortUnreachableSockError)(nil)
    70  
    71  // icmpv4DestinationPortUnreachableSockError is an ICMPv4 Destination Port
    72  // Unreachable error.
    73  //
    74  // It indicates that a packet reached the destination host, but the transport
    75  // protocol was not active on the destination port.
    76  //
    77  // +stateify savable
    78  type icmpv4DestinationPortUnreachableSockError struct {
    79  	icmpv4DestinationUnreachableSockError
    80  }
    81  
    82  // Code implements tcpip.SockErrorCause.
    83  func (*icmpv4DestinationPortUnreachableSockError) Code() uint8 {
    84  	return uint8(header.ICMPv4PortUnreachable)
    85  }
    86  
    87  // Kind implements stack.TransportError.
    88  func (*icmpv4DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind {
    89  	return stack.DestinationPortUnreachableTransportError
    90  }
    91  
    92  var _ stack.TransportError = (*icmpv4FragmentationNeededSockError)(nil)
    93  
    94  // icmpv4FragmentationNeededSockError is an ICMPv4 Destination Unreachable error
    95  // due to fragmentation being required but the packet was set to not be
    96  // fragmented.
    97  //
    98  // It indicates that a link exists on the path to the destination with an MTU
    99  // that is too small to carry the packet.
   100  //
   101  // +stateify savable
   102  type icmpv4FragmentationNeededSockError struct {
   103  	icmpv4DestinationUnreachableSockError
   104  
   105  	mtu uint32
   106  }
   107  
   108  // Code implements tcpip.SockErrorCause.
   109  func (*icmpv4FragmentationNeededSockError) Code() uint8 {
   110  	return uint8(header.ICMPv4FragmentationNeeded)
   111  }
   112  
   113  // Info implements tcpip.SockErrorCause.
   114  func (e *icmpv4FragmentationNeededSockError) Info() uint32 {
   115  	return e.mtu
   116  }
   117  
   118  // Kind implements stack.TransportError.
   119  func (*icmpv4FragmentationNeededSockError) Kind() stack.TransportErrorKind {
   120  	return stack.PacketTooBigTransportError
   121  }
   122  
   123  func (e *endpoint) checkLocalAddress(addr tcpip.Address) bool {
   124  	if e.nic.Spoofing() {
   125  		return true
   126  	}
   127  
   128  	if addressEndpoint := e.AcquireAssignedAddress(addr, false, stack.NeverPrimaryEndpoint); addressEndpoint != nil {
   129  		addressEndpoint.DecRef()
   130  		return true
   131  	}
   132  	return false
   133  }
   134  
   135  // handleControl handles the case when an ICMP error packet contains the headers
   136  // of the original packet that caused the ICMP one to be sent. This information
   137  // is used to find out which transport endpoint must be notified about the ICMP
   138  // packet. We only expect the payload, not the enclosing ICMP packet.
   139  func (e *endpoint) handleControl(errInfo stack.TransportError, pkt *stack.PacketBuffer) {
   140  	h, ok := pkt.Data().PullUp(header.IPv4MinimumSize)
   141  	if !ok {
   142  		return
   143  	}
   144  	hdr := header.IPv4(h)
   145  
   146  	// We don't use IsValid() here because ICMP only requires that the IP
   147  	// header plus 8 bytes of the transport header be included. So it's
   148  	// likely that it is truncated, which would cause IsValid to return
   149  	// false.
   150  	//
   151  	// Drop packet if it doesn't have the basic IPv4 header or if the
   152  	// original source address doesn't match an address we own.
   153  	srcAddr := hdr.SourceAddress()
   154  	if !e.checkLocalAddress(srcAddr) {
   155  		return
   156  	}
   157  
   158  	hlen := int(hdr.HeaderLength())
   159  	if pkt.Data().Size() < hlen || hdr.FragmentOffset() != 0 {
   160  		// We won't be able to handle this if it doesn't contain the
   161  		// full IPv4 header, or if it's a fragment not at offset 0
   162  		// (because it won't have the transport header).
   163  		return
   164  	}
   165  
   166  	// Keep needed information before trimming header.
   167  	p := hdr.TransportProtocol()
   168  	dstAddr := hdr.DestinationAddress()
   169  	// Skip the ip header, then deliver the error.
   170  	pkt.Data().DeleteFront(hlen)
   171  	e.dispatcher.DeliverTransportError(srcAddr, dstAddr, ProtocolNumber, p, errInfo, pkt)
   172  }
   173  
   174  func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
   175  	received := e.stats.icmp.packetsReceived
   176  	// ICMP packets don't have their TransportHeader fields set. See
   177  	// icmp/protocol.go:protocol.Parse for a full explanation.
   178  	v, ok := pkt.Data().PullUp(header.ICMPv4MinimumSize)
   179  	if !ok {
   180  		received.invalid.Increment()
   181  		return
   182  	}
   183  	h := header.ICMPv4(v)
   184  
   185  	// Only do in-stack processing if the checksum is correct.
   186  	if pkt.Data().AsRange().Checksum() != 0xffff {
   187  		received.invalid.Increment()
   188  		// It's possible that a raw socket expects to receive this regardless
   189  		// of checksum errors. If it's an echo request we know it's safe because
   190  		// we are the only handler, however other types do not cope well with
   191  		// packets with checksum errors.
   192  		switch h.Type() {
   193  		case header.ICMPv4Echo:
   194  			e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
   195  		}
   196  		return
   197  	}
   198  
   199  	iph := header.IPv4(pkt.NetworkHeader().View())
   200  	var newOptions header.IPv4Options
   201  	if opts := iph.Options(); len(opts) != 0 {
   202  		// RFC 1122 section 3.2.2.6 (page 43) (and similar for other round trip
   203  		// type ICMP packets):
   204  		//    If a Record Route and/or Time Stamp option is received in an
   205  		//    ICMP Echo Request, this option (these options) SHOULD be
   206  		//    updated to include the current host and included in the IP
   207  		//    header of the Echo Reply message, without "truncation".
   208  		//    Thus, the recorded route will be for the entire round trip.
   209  		//
   210  		// So we need to let the option processor know how it should handle them.
   211  		var op optionsUsage
   212  		if h.Type() == header.ICMPv4Echo {
   213  			op = &optionUsageEcho{}
   214  		} else {
   215  			op = &optionUsageReceive{}
   216  		}
   217  		var optProblem *header.IPv4OptParameterProblem
   218  		newOptions, _, optProblem = e.processIPOptions(pkt, opts, op)
   219  		if optProblem != nil {
   220  			if optProblem.NeedICMP {
   221  				_ = e.protocol.returnError(&icmpReasonParamProblem{
   222  					pointer: optProblem.Pointer,
   223  				}, pkt)
   224  				e.stats.ip.MalformedPacketsReceived.Increment()
   225  			}
   226  			return
   227  		}
   228  		copied := copy(opts, newOptions)
   229  		if copied != len(newOptions) {
   230  			panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOptions)))
   231  		}
   232  		for i := copied; i < len(opts); i++ {
   233  			// Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero".
   234  			opts[i] = byte(header.IPv4OptionListEndType)
   235  		}
   236  	}
   237  
   238  	// TODO(b/112892170): Meaningfully handle all ICMP types.
   239  	switch h.Type() {
   240  	case header.ICMPv4Echo:
   241  		received.echoRequest.Increment()
   242  
   243  		sent := e.stats.icmp.packetsSent
   244  		if !e.protocol.stack.AllowICMPMessage() {
   245  			sent.rateLimited.Increment()
   246  			return
   247  		}
   248  
   249  		// DeliverTransportPacket will take ownership of pkt so don't use it beyond
   250  		// this point. Make a deep copy of the data before pkt gets sent as we will
   251  		// be modifying fields.
   252  		//
   253  		// TODO(github.com/SagerNet/issue/4399): The copy may not be needed if there are no
   254  		// waiting endpoints. Consider moving responsibility for doing the copy to
   255  		// DeliverTransportPacket so that is is only done when needed.
   256  		replyData := pkt.Data().AsRange().ToOwnedView()
   257  		ipHdr := header.IPv4(pkt.NetworkHeader().View())
   258  		localAddressBroadcast := pkt.NetworkPacketInfo.LocalAddressBroadcast
   259  
   260  		// It's possible that a raw socket expects to receive this.
   261  		e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
   262  		pkt = nil
   263  
   264  		// Take the base of the incoming request IP header but replace the options.
   265  		replyHeaderLength := uint8(header.IPv4MinimumSize + len(newOptions))
   266  		replyIPHdr := header.IPv4(append(iph[:header.IPv4MinimumSize:header.IPv4MinimumSize], newOptions...))
   267  		replyIPHdr.SetHeaderLength(replyHeaderLength)
   268  
   269  		// As per RFC 1122 section 3.2.1.3, when a host sends any datagram, the IP
   270  		// source address MUST be one of its own IP addresses (but not a broadcast
   271  		// or multicast address).
   272  		localAddr := ipHdr.DestinationAddress()
   273  		if localAddressBroadcast || header.IsV4MulticastAddress(localAddr) {
   274  			localAddr = ""
   275  		}
   276  
   277  		r, err := e.protocol.stack.FindRoute(e.nic.ID(), localAddr, ipHdr.SourceAddress(), ProtocolNumber, false /* multicastLoop */)
   278  		if err != nil {
   279  			// If we cannot find a route to the destination, silently drop the packet.
   280  			return
   281  		}
   282  		defer r.Release()
   283  
   284  		// TODO(github.com/SagerNet/issue/3810:) When adding protocol numbers into the
   285  		// header information, we may have to change this code to handle the
   286  		// ICMP header no longer being in the data buffer.
   287  
   288  		// Because IP and ICMP are so closely intertwined, we need to handcraft our
   289  		// IP header to be able to follow RFC 792. The wording on page 13 is as
   290  		// follows:
   291  		//   IP Fields:
   292  		//   Addresses
   293  		//     The address of the source in an echo message will be the
   294  		//     destination of the echo reply message.  To form an echo reply
   295  		//     message, the source and destination addresses are simply reversed,
   296  		//     the type code changed to 0, and the checksum recomputed.
   297  		//
   298  		// This was interpreted by early implementors to mean that all options must
   299  		// be copied from the echo request IP header to the echo reply IP header
   300  		// and this behaviour is still relied upon by some applications.
   301  		//
   302  		// Create a copy of the IP header we received, options and all, and change
   303  		// The fields we need to alter.
   304  		//
   305  		// We need to produce the entire packet in the data segment in order to
   306  		// use WriteHeaderIncludedPacket(). WriteHeaderIncludedPacket sets the
   307  		// total length and the header checksum so we don't need to set those here.
   308  		replyIPHdr.SetSourceAddress(r.LocalAddress())
   309  		replyIPHdr.SetDestinationAddress(r.RemoteAddress())
   310  		replyIPHdr.SetTTL(r.DefaultTTL())
   311  
   312  		replyICMPHdr := header.ICMPv4(replyData)
   313  		replyICMPHdr.SetType(header.ICMPv4EchoReply)
   314  		replyICMPHdr.SetChecksum(0)
   315  		replyICMPHdr.SetChecksum(^header.Checksum(replyData, 0))
   316  
   317  		replyVV := buffer.View(replyIPHdr).ToVectorisedView()
   318  		replyVV.AppendView(replyData)
   319  		replyPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
   320  			ReserveHeaderBytes: int(r.MaxHeaderLength()),
   321  			Data:               replyVV,
   322  		})
   323  		replyPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
   324  
   325  		if err := r.WriteHeaderIncludedPacket(replyPkt); err != nil {
   326  			sent.dropped.Increment()
   327  			return
   328  		}
   329  		sent.echoReply.Increment()
   330  
   331  	case header.ICMPv4EchoReply:
   332  		received.echoReply.Increment()
   333  
   334  		e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
   335  
   336  	case header.ICMPv4DstUnreachable:
   337  		received.dstUnreachable.Increment()
   338  
   339  		mtu := h.MTU()
   340  		code := h.Code()
   341  		pkt.Data().DeleteFront(header.ICMPv4MinimumSize)
   342  		switch code {
   343  		case header.ICMPv4HostUnreachable:
   344  			e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt)
   345  		case header.ICMPv4PortUnreachable:
   346  			e.handleControl(&icmpv4DestinationPortUnreachableSockError{}, pkt)
   347  		case header.ICMPv4FragmentationNeeded:
   348  			networkMTU, err := calculateNetworkMTU(uint32(mtu), header.IPv4MinimumSize)
   349  			if err != nil {
   350  				networkMTU = 0
   351  			}
   352  			e.handleControl(&icmpv4FragmentationNeededSockError{mtu: networkMTU}, pkt)
   353  		}
   354  	case header.ICMPv4SrcQuench:
   355  		received.srcQuench.Increment()
   356  
   357  	case header.ICMPv4Redirect:
   358  		received.redirect.Increment()
   359  
   360  	case header.ICMPv4TimeExceeded:
   361  		received.timeExceeded.Increment()
   362  
   363  	case header.ICMPv4ParamProblem:
   364  		received.paramProblem.Increment()
   365  
   366  	case header.ICMPv4Timestamp:
   367  		received.timestamp.Increment()
   368  
   369  	case header.ICMPv4TimestampReply:
   370  		received.timestampReply.Increment()
   371  
   372  	case header.ICMPv4InfoRequest:
   373  		received.infoRequest.Increment()
   374  
   375  	case header.ICMPv4InfoReply:
   376  		received.infoReply.Increment()
   377  
   378  	default:
   379  		received.invalid.Increment()
   380  	}
   381  }
   382  
   383  // ======= ICMP Error packet generation =========
   384  
   385  // icmpReason is a marker interface for IPv4 specific ICMP errors.
   386  type icmpReason interface {
   387  	isICMPReason()
   388  	// isForwarding indicates whether or not the error arose while attempting to
   389  	// forward a packet.
   390  	isForwarding() bool
   391  }
   392  
   393  // icmpReasonPortUnreachable is an error where the transport protocol has no
   394  // listener and no alternative means to inform the sender.
   395  type icmpReasonPortUnreachable struct{}
   396  
   397  func (*icmpReasonPortUnreachable) isICMPReason() {}
   398  func (*icmpReasonPortUnreachable) isForwarding() bool {
   399  	return false
   400  }
   401  
   402  // icmpReasonProtoUnreachable is an error where the transport protocol is
   403  // not supported.
   404  type icmpReasonProtoUnreachable struct{}
   405  
   406  func (*icmpReasonProtoUnreachable) isICMPReason() {}
   407  func (*icmpReasonProtoUnreachable) isForwarding() bool {
   408  	return false
   409  }
   410  
   411  // icmpReasonTTLExceeded is an error where a packet's time to live exceeded in
   412  // transit to its final destination, as per RFC 792 page 6, Time Exceeded
   413  // Message.
   414  type icmpReasonTTLExceeded struct{}
   415  
   416  func (*icmpReasonTTLExceeded) isICMPReason() {}
   417  func (*icmpReasonTTLExceeded) isForwarding() bool {
   418  	// If we hit a TTL Exceeded error, then we know we are operating as a router.
   419  	// As per RFC 792 page 6, Time Exceeded Message,
   420  	//
   421  	//   If the gateway processing a datagram finds the time to live field
   422  	//   is zero it must discard the datagram.  The gateway may also notify
   423  	//   the source host via the time exceeded message.
   424  	return true
   425  }
   426  
   427  // icmpReasonReassemblyTimeout is an error where insufficient fragments are
   428  // received to complete reassembly of a packet within a configured time after
   429  // the reception of the first-arriving fragment of that packet.
   430  type icmpReasonReassemblyTimeout struct{}
   431  
   432  func (*icmpReasonReassemblyTimeout) isICMPReason() {}
   433  func (*icmpReasonReassemblyTimeout) isForwarding() bool {
   434  	return false
   435  }
   436  
   437  // icmpReasonParamProblem is an error to use to request a Parameter Problem
   438  // message to be sent.
   439  type icmpReasonParamProblem struct {
   440  	pointer    byte
   441  	forwarding bool
   442  }
   443  
   444  func (*icmpReasonParamProblem) isICMPReason() {}
   445  func (r *icmpReasonParamProblem) isForwarding() bool {
   446  	return r.forwarding
   447  }
   448  
   449  // icmpReasonNetworkUnreachable is an error in which the network specified in
   450  // the internet destination field of the datagram is unreachable.
   451  type icmpReasonNetworkUnreachable struct{}
   452  
   453  func (*icmpReasonNetworkUnreachable) isICMPReason() {}
   454  func (*icmpReasonNetworkUnreachable) isForwarding() bool {
   455  	// If we hit a Net Unreachable error, then we know we are operating as
   456  	// a router. As per RFC 792 page 5, Destination Unreachable Message,
   457  	//
   458  	//  If, according to the information in the gateway's routing tables,
   459  	//  the network specified in the internet destination field of a
   460  	//  datagram is unreachable, e.g., the distance to the network is
   461  	//  infinity, the gateway may send a destination unreachable message to
   462  	//  the internet source host of the datagram.
   463  	return true
   464  }
   465  
   466  // icmpReasonFragmentationNeeded is an error where a packet requires
   467  // fragmentation while also having the Don't Fragment flag set, as per RFC 792
   468  // page 3, Destination Unreachable Message.
   469  type icmpReasonFragmentationNeeded struct{}
   470  
   471  func (*icmpReasonFragmentationNeeded) isICMPReason() {}
   472  func (*icmpReasonFragmentationNeeded) isForwarding() bool {
   473  	// If we hit a Don't Fragment error, then we know we are operating as a router.
   474  	// As per RFC 792 page 4, Destination Unreachable Message,
   475  	//
   476  	//   Another case is when a datagram must be fragmented to be forwarded by a
   477  	//   gateway yet the Don't Fragment flag is on. In this case the gateway must
   478  	//   discard the datagram and may return a destination unreachable message.
   479  	return true
   480  }
   481  
   482  // icmpReasonHostUnreachable is an error in which the host specified in the
   483  // internet destination field of the datagram is unreachable.
   484  type icmpReasonHostUnreachable struct{}
   485  
   486  func (*icmpReasonHostUnreachable) isICMPReason() {}
   487  func (*icmpReasonHostUnreachable) isForwarding() bool {
   488  	// If we hit a Host Unreachable error, then we know we are operating as a
   489  	// router. As per RFC 792 page 5, Destination Unreachable Message,
   490  	//
   491  	//   In addition, in some networks, the gateway may be able to determine
   492  	//   if the internet destination host is unreachable.  Gateways in these
   493  	//   networks may send destination unreachable messages to the source host
   494  	//   when the destination host is unreachable.
   495  	return true
   496  }
   497  
   498  // returnError takes an error descriptor and generates the appropriate ICMP
   499  // error packet for IPv4 and sends it back to the remote device that sent
   500  // the problematic packet. It incorporates as much of that packet as
   501  // possible as well as any error metadata as is available. returnError
   502  // expects pkt to hold a valid IPv4 packet as per the wire format.
   503  func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error {
   504  	origIPHdr := header.IPv4(pkt.NetworkHeader().View())
   505  	origIPHdrSrc := origIPHdr.SourceAddress()
   506  	origIPHdrDst := origIPHdr.DestinationAddress()
   507  
   508  	// We check we are responding only when we are allowed to.
   509  	// See RFC 1812 section 4.3.2.7 (shown below).
   510  	//
   511  	// =========
   512  	// 4.3.2.7 When Not to Send ICMP Errors
   513  	//
   514  	//  An ICMP error message MUST NOT be sent as the result of receiving:
   515  	//
   516  	//  o An ICMP error message, or
   517  	//
   518  	//  o A packet which fails the IP header validation tests described in
   519  	//    Section [5.2.2] (except where that section specifically permits
   520  	//    the sending of an ICMP error message), or
   521  	//
   522  	//  o A packet destined to an IP broadcast or IP multicast address, or
   523  	//
   524  	//  o A packet sent as a Link Layer broadcast or multicast, or
   525  	//
   526  	//  o Any fragment of a datagram other then the first fragment (i.e., a
   527  	// packet for which the fragment offset in the IP header is nonzero).
   528  	//
   529  	// TODO(github.com/SagerNet/issues/4058): Make sure we don't send ICMP errors in
   530  	// response to a non-initial fragment, but it currently can not happen.
   531  	if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(origIPHdrDst) || origIPHdrSrc == header.IPv4Any {
   532  		return nil
   533  	}
   534  
   535  	// If we are operating as a router/gateway, don't use the packet's destination
   536  	// address as the response's source address as we should not not own the
   537  	// destination address of a packet we are forwarding.
   538  	localAddr := origIPHdrDst
   539  	if reason.isForwarding() {
   540  		localAddr = ""
   541  	}
   542  
   543  	// Even if we were able to receive a packet from some remote, we may not have
   544  	// a route to it - the remote may be blocked via routing rules. We must always
   545  	// consult our routing table and find a route to the remote before sending any
   546  	// packet.
   547  	route, err := p.stack.FindRoute(pkt.NICID, localAddr, origIPHdrSrc, ProtocolNumber, false /* multicastLoop */)
   548  	if err != nil {
   549  		return err
   550  	}
   551  	defer route.Release()
   552  
   553  	p.mu.Lock()
   554  	// We retrieve an endpoint using the newly constructed route's NICID rather
   555  	// than the packet's NICID. The packet's NICID corresponds to the NIC on
   556  	// which it arrived, which isn't necessarily the same as the NIC on which it
   557  	// will be transmitted. On the other hand, the route's NIC *is* guaranteed
   558  	// to be the NIC on which the packet will be transmitted.
   559  	netEP, ok := p.mu.eps[route.NICID()]
   560  	p.mu.Unlock()
   561  	if !ok {
   562  		return &tcpip.ErrNotConnected{}
   563  	}
   564  
   565  	sent := netEP.stats.icmp.packetsSent
   566  
   567  	if !p.stack.AllowICMPMessage() {
   568  		sent.rateLimited.Increment()
   569  		return nil
   570  	}
   571  
   572  	transportHeader := pkt.TransportHeader().View()
   573  
   574  	// Don't respond to icmp error packets.
   575  	if origIPHdr.Protocol() == uint8(header.ICMPv4ProtocolNumber) {
   576  		// TODO(github.com/SagerNet/issue/3810):
   577  		// Unfortunately the current stack pretty much always has ICMPv4 headers
   578  		// in the Data section of the packet but there is no guarantee that is the
   579  		// case. If this is the case grab the header to make it like all other
   580  		// packet types. When this is cleaned up the Consume should be removed.
   581  		if transportHeader.IsEmpty() {
   582  			var ok bool
   583  			transportHeader, ok = pkt.TransportHeader().Consume(header.ICMPv4MinimumSize)
   584  			if !ok {
   585  				return nil
   586  			}
   587  		} else if transportHeader.Size() < header.ICMPv4MinimumSize {
   588  			return nil
   589  		}
   590  		// We need to decide to explicitly name the packets we can respond to or
   591  		// the ones we can not respond to. The decision is somewhat arbitrary and
   592  		// if problems arise this could be reversed. It was judged less of a breach
   593  		// of protocol to not respond to unknown non-error packets than to respond
   594  		// to unknown error packets so we take the first approach.
   595  		switch header.ICMPv4(transportHeader).Type() {
   596  		case
   597  			header.ICMPv4EchoReply,
   598  			header.ICMPv4Echo,
   599  			header.ICMPv4Timestamp,
   600  			header.ICMPv4TimestampReply,
   601  			header.ICMPv4InfoRequest,
   602  			header.ICMPv4InfoReply:
   603  		default:
   604  			// Assume any type we don't know about may be an error type.
   605  			return nil
   606  		}
   607  	}
   608  
   609  	// Now work out how much of the triggering packet we should return.
   610  	// As per RFC 1812 Section 4.3.2.3
   611  	//
   612  	//   ICMP datagram SHOULD contain as much of the original
   613  	//   datagram as possible without the length of the ICMP
   614  	//   datagram exceeding 576 bytes.
   615  	//
   616  	// NOTE: The above RFC referenced is different from the original
   617  	// recommendation in RFC 1122 and RFC 792 where it mentioned that at
   618  	// least 8 bytes of the payload must be included. Today linux and other
   619  	// systems implement the RFC 1812 definition and not the original
   620  	// requirement. We treat 8 bytes as the minimum but will try send more.
   621  	mtu := int(route.MTU())
   622  	const maxIPData = header.IPv4MinimumProcessableDatagramSize - header.IPv4MinimumSize
   623  	if mtu > maxIPData {
   624  		mtu = maxIPData
   625  	}
   626  	available := mtu - header.ICMPv4MinimumSize
   627  
   628  	if available < len(origIPHdr)+header.ICMPv4MinimumErrorPayloadSize {
   629  		return nil
   630  	}
   631  
   632  	payloadLen := len(origIPHdr) + transportHeader.Size() + pkt.Data().Size()
   633  	if payloadLen > available {
   634  		payloadLen = available
   635  	}
   636  
   637  	// The buffers used by pkt may be used elsewhere in the system.
   638  	// For example, an AF_RAW or AF_PACKET socket may use what the transport
   639  	// protocol considers an unreachable destination. Thus we deep copy pkt to
   640  	// prevent multiple ownership and SR errors. The new copy is a vectorized
   641  	// view with the entire incoming IP packet reassembled and truncated as
   642  	// required. This is now the payload of the new ICMP packet and no longer
   643  	// considered a packet in its own right.
   644  	newHeader := append(buffer.View(nil), origIPHdr...)
   645  	newHeader = append(newHeader, transportHeader...)
   646  	payload := newHeader.ToVectorisedView()
   647  	if dataCap := payloadLen - payload.Size(); dataCap > 0 {
   648  		payload.AppendView(pkt.Data().AsRange().Capped(dataCap).ToOwnedView())
   649  	} else {
   650  		payload.CapLength(payloadLen)
   651  	}
   652  
   653  	icmpPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
   654  		ReserveHeaderBytes: int(route.MaxHeaderLength()) + header.ICMPv4MinimumSize,
   655  		Data:               payload,
   656  	})
   657  
   658  	icmpPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
   659  
   660  	icmpHdr := header.ICMPv4(icmpPkt.TransportHeader().Push(header.ICMPv4MinimumSize))
   661  	var counter tcpip.MultiCounterStat
   662  	switch reason := reason.(type) {
   663  	case *icmpReasonPortUnreachable:
   664  		icmpHdr.SetType(header.ICMPv4DstUnreachable)
   665  		icmpHdr.SetCode(header.ICMPv4PortUnreachable)
   666  		counter = sent.dstUnreachable
   667  	case *icmpReasonProtoUnreachable:
   668  		icmpHdr.SetType(header.ICMPv4DstUnreachable)
   669  		icmpHdr.SetCode(header.ICMPv4ProtoUnreachable)
   670  		counter = sent.dstUnreachable
   671  	case *icmpReasonNetworkUnreachable:
   672  		icmpHdr.SetType(header.ICMPv4DstUnreachable)
   673  		icmpHdr.SetCode(header.ICMPv4NetUnreachable)
   674  		counter = sent.dstUnreachable
   675  	case *icmpReasonHostUnreachable:
   676  		icmpHdr.SetType(header.ICMPv4DstUnreachable)
   677  		icmpHdr.SetCode(header.ICMPv4HostUnreachable)
   678  		counter = sent.dstUnreachable
   679  	case *icmpReasonFragmentationNeeded:
   680  		icmpHdr.SetType(header.ICMPv4DstUnreachable)
   681  		icmpHdr.SetCode(header.ICMPv4FragmentationNeeded)
   682  		counter = sent.dstUnreachable
   683  	case *icmpReasonTTLExceeded:
   684  		icmpHdr.SetType(header.ICMPv4TimeExceeded)
   685  		icmpHdr.SetCode(header.ICMPv4TTLExceeded)
   686  		counter = sent.timeExceeded
   687  	case *icmpReasonReassemblyTimeout:
   688  		icmpHdr.SetType(header.ICMPv4TimeExceeded)
   689  		icmpHdr.SetCode(header.ICMPv4ReassemblyTimeout)
   690  		counter = sent.timeExceeded
   691  	case *icmpReasonParamProblem:
   692  		icmpHdr.SetType(header.ICMPv4ParamProblem)
   693  		icmpHdr.SetCode(header.ICMPv4UnusedCode)
   694  		icmpHdr.SetPointer(reason.pointer)
   695  		counter = sent.paramProblem
   696  	default:
   697  		panic(fmt.Sprintf("unsupported ICMP type %T", reason))
   698  	}
   699  	icmpHdr.SetChecksum(header.ICMPv4Checksum(icmpHdr, icmpPkt.Data().AsRange().Checksum()))
   700  
   701  	if err := route.WritePacket(
   702  		stack.NetworkHeaderParams{
   703  			Protocol: header.ICMPv4ProtocolNumber,
   704  			TTL:      route.DefaultTTL(),
   705  			TOS:      stack.DefaultTOS,
   706  		},
   707  		icmpPkt,
   708  	); err != nil {
   709  		sent.dropped.Increment()
   710  		return err
   711  	}
   712  	counter.Increment()
   713  	return nil
   714  }
   715  
   716  // OnReassemblyTimeout implements fragmentation.TimeoutHandler.
   717  func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
   718  	// OnReassemblyTimeout sends a Time Exceeded Message, as per RFC 792:
   719  	//
   720  	//   If a host reassembling a fragmented datagram cannot complete the
   721  	//   reassembly due to missing fragments within its time limit it discards the
   722  	//   datagram, and it may send a time exceeded message.
   723  	//
   724  	//   If fragment zero is not available then no time exceeded need be sent at
   725  	//   all.
   726  	if pkt != nil {
   727  		p.returnError(&icmpReasonReassemblyTimeout{}, pkt)
   728  	}
   729  }