github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/packetman/packetman_linux.go (about)

     1  /*
     2   * Copyright (c) 2020, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package packetman
    21  
    22  import (
    23  	"context"
    24  	"encoding/binary"
    25  	"log"
    26  	"net"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"syscall"
    31  	"time"
    32  
    33  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    34  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    35  	"github.com/florianl/go-nfqueue"
    36  	"github.com/google/gopacket"
    37  	"github.com/google/gopacket/layers"
    38  	cache "github.com/patrickmn/go-cache"
    39  )
    40  
    41  func IsSupported() bool {
    42  	return true
    43  }
    44  
    45  const (
    46  	defaultSocketMark   = 0x70736970 // "PSIP"
    47  	appliedSpecCacheTTL = 1 * time.Minute
    48  )
    49  
    50  // Manipulator is a SYN-ACK packet manipulator.
    51  //
    52  // NFQUEUE/Netlink is used to intercept SYN-ACK packets, on all local
    53  // interfaces, with source port equal to one of the ProtocolPorts specified in
    54  // Config. For each intercepted SYN-ACK packet, the SelectSpecName callback in
    55  // Config is invoked; the callback determines which packet transformation spec
    56  // to apply, based on, for example, client GeoIP, protocol, or other
    57  // considerations.
    58  //
    59  // Protocol network listeners use GetAppliedSpecName to determine which
    60  // transformation spec was applied to a given accepted connection.
    61  //
    62  // When a manipulations are to be applied to a SYN-ACK packet, NFQUEUE is
    63  // instructed to drop the packet and one or more new packets, created by
    64  // applying transformations to the original SYN-ACK packet, are injected via
    65  // raw sockets. Raw sockets are used as NFQUEUE supports only replacing the
    66  // original packet with one alternative packet.
    67  //
    68  // To avoid an intercept loop, injected packets are marked (SO_MARK) and the
    69  // filter for NFQUEUE excludes packets with this mark.
    70  //
    71  // To avoid breaking TCP in unexpected cases, Manipulator fails open --
    72  // allowing the original packet to proceed -- when packet parsing fails. For
    73  // the same reason, the queue-bypass NFQUEUE option is set.
    74  //
    75  // As an iptables filter ensures only SYN-ACK packets are sent to the
    76  // NFQUEUEs, the overhead of packet interception, parsing, and injection is
    77  // incurred no more than once per TCP connection.
    78  //
    79  // NFQUEUE with queue-bypass requires Linux kernel 2.6.39; 3.16 or later is
    80  // validated and recommended.
    81  //
    82  // Due to use of NFQUEUE, larger than max socket buffer sizes, and raw
    83  // sockets, Manipulator requires CAP_NET_ADMIN and CAP_NET_RAW.
    84  type Manipulator struct {
    85  	config             *Config
    86  	mutex              sync.Mutex
    87  	runContext         context.Context
    88  	stopRunning        context.CancelFunc
    89  	waitGroup          *sync.WaitGroup
    90  	injectIPv4FD       int
    91  	injectIPv6FD       int
    92  	nfqueue            *nfqueue.Nfqueue
    93  	compiledSpecsMutex sync.Mutex
    94  	compiledSpecs      map[string]*compiledSpec
    95  	appliedSpecCache   *cache.Cache
    96  }
    97  
    98  // NewManipulator creates a new Manipulator.
    99  func NewManipulator(config *Config) (*Manipulator, error) {
   100  
   101  	m := &Manipulator{
   102  		config: config,
   103  	}
   104  
   105  	err := m.SetSpecs(config.Specs)
   106  	if err != nil {
   107  		return nil, errors.Trace(err)
   108  	}
   109  
   110  	// To avoid memory exhaustion, do not retain unconsumed appliedSpecCache
   111  	// entries for a longer time than it may reasonably take to complete the TCP
   112  	// handshake.
   113  	m.appliedSpecCache = cache.New(appliedSpecCacheTTL, appliedSpecCacheTTL/2)
   114  
   115  	return m, nil
   116  }
   117  
   118  // Start initializes NFQUEUEs and raw sockets for packet manipulation. Start
   119  // returns when initialization is complete; once it returns, the caller may
   120  // assume that any SYN-ACK packets on configured ports will be intercepted. In
   121  // the case of initialization failure, Start will undo any partial
   122  // initialization. When Start succeeds, the caller must call Stop to free
   123  // resources and restore networking state.
   124  func (m *Manipulator) Start() (retErr error) {
   125  
   126  	m.mutex.Lock()
   127  	defer m.mutex.Unlock()
   128  
   129  	if m.runContext != nil {
   130  		return errors.TraceNew("already running")
   131  	}
   132  
   133  	if len(m.config.ProtocolPorts) == 0 {
   134  		// There are no ports to intercept, so there is nothing to run. Skip
   135  		// subsequent operations which assume at least one intercept port is
   136  		// configured. This is a success case, and a subseqent call to Stop is a
   137  		// no-op.
   138  		return nil
   139  	}
   140  
   141  	err := m.configureIPTables(true)
   142  	if err != nil {
   143  		return errors.Trace(err)
   144  	}
   145  	defer func() {
   146  		if retErr != nil {
   147  			m.configureIPTables(false)
   148  		}
   149  	}()
   150  
   151  	m.injectIPv4FD, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
   152  	if err != nil {
   153  		return errors.Trace(err)
   154  	}
   155  	defer func() {
   156  		if retErr != nil {
   157  			syscall.Close(m.injectIPv4FD)
   158  		}
   159  	}()
   160  
   161  	err = syscall.SetsockoptInt(m.injectIPv4FD, syscall.IPPROTO_IP, syscall.IP_HDRINCL, 1)
   162  	if err != nil {
   163  		return errors.Trace(err)
   164  	}
   165  
   166  	err = syscall.SetsockoptInt(m.injectIPv4FD, syscall.SOL_SOCKET, syscall.SO_MARK, m.getSocketMark())
   167  	if err != nil {
   168  		return errors.Trace(err)
   169  	}
   170  
   171  	m.injectIPv6FD, err = syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
   172  	if err != nil && !m.config.AllowNoIPv6NetworkConfiguration {
   173  		return errors.Trace(err)
   174  	}
   175  	defer func() {
   176  		if retErr != nil {
   177  			syscall.Close(m.injectIPv6FD)
   178  		}
   179  	}()
   180  
   181  	if m.injectIPv6FD != 0 {
   182  		err = syscall.SetsockoptInt(m.injectIPv6FD, syscall.IPPROTO_IPV6, syscall.IP_HDRINCL, 1)
   183  		if err != nil {
   184  			// There's no AllowNoIPv6NetworkConfiguration in this case: if we can
   185  			// create an IPv6 socket, we must be able to set its options.
   186  			return errors.Trace(err)
   187  		}
   188  
   189  		err = syscall.SetsockoptInt(m.injectIPv6FD, syscall.SOL_SOCKET, syscall.SO_MARK, m.getSocketMark())
   190  		if err != nil {
   191  			return errors.Trace(err)
   192  		}
   193  	}
   194  
   195  	// Use a reasonable buffer size to avoid excess allocation. As we're
   196  	// intercepting only locally generated SYN-ACK packets, which should have no
   197  	// payload, this size should be more than sufficient.
   198  	maxPacketLen := uint32(1500)
   199  
   200  	// The kernel default is 1024:
   201  	// https://github.com/torvalds/linux/blob/cd8dead0c39457e58ec1d36db93aedca811d48f1/net/netfilter/nfnetlink_queue.c#L51,
   202  	// via https://github.com/florianl/go-nfqueue/issues/3.
   203  	// We use a larger queue size to accomodate more concurrent SYN-ACK packets.
   204  	maxQueueLen := uint32(2048)
   205  
   206  	// Timeout note: on a small subset of production servers, we have found that
   207  	// setting a non-zero read timeout results in occasional "orphaned" packets
   208  	// which remain in the queue but are not delivered to handleInterceptedPacket
   209  	// for a verdict. This phenomenon leads to a stall in nfqueue processing once
   210  	// the queue fills up with packets apparently awaiting a verdict. The shorter
   211  	// the timeout, the faster that orphaned packets accumulate. With no timeout,
   212  	// and reads in blocking mode, this phenomenon does not occur.
   213  
   214  	m.nfqueue, err = nfqueue.Open(
   215  		&nfqueue.Config{
   216  			NfQueue:      uint16(m.config.QueueNumber),
   217  			MaxPacketLen: maxPacketLen,
   218  			MaxQueueLen:  maxQueueLen,
   219  			Copymode:     nfqueue.NfQnlCopyPacket,
   220  			Logger:       newNfqueueLogger(m.config.Logger),
   221  			ReadTimeout:  0,
   222  			WriteTimeout: 0,
   223  		})
   224  	if err != nil {
   225  		return errors.Trace(err)
   226  	}
   227  	defer func() {
   228  		if retErr != nil {
   229  			m.nfqueue.Close()
   230  		}
   231  	}()
   232  
   233  	// Set a netlink socket receive buffer size that is significantly larger than
   234  	// the typical default of 212992. This avoids ENOBUFS in the case of many
   235  	// netlink messages from the kernel (capped by the max queue size). Note that
   236  	// the CAP_NET_ADMIN may be required when this exceeds the configured max
   237  	// buffer size.
   238  	err = m.nfqueue.Con.SetReadBuffer(1703936)
   239  	if err != nil {
   240  		return errors.Trace(err)
   241  	}
   242  
   243  	runContext, stopRunning := context.WithCancel(context.Background())
   244  	defer func() {
   245  		if retErr != nil {
   246  			stopRunning()
   247  		}
   248  	}()
   249  
   250  	err = m.nfqueue.Register(runContext, m.handleInterceptedPacket)
   251  	if err != nil {
   252  		return errors.Trace(err)
   253  	}
   254  
   255  	m.runContext = runContext
   256  	m.stopRunning = stopRunning
   257  
   258  	return nil
   259  }
   260  
   261  // Stop halts packet manipulation, frees resources, and restores networking
   262  // state.
   263  func (m *Manipulator) Stop() {
   264  
   265  	m.mutex.Lock()
   266  	defer m.mutex.Unlock()
   267  
   268  	if m.runContext == nil {
   269  		return
   270  	}
   271  
   272  	// Call stopRunning before interrupting the blocked read; this ensures that
   273  	// the nfqueue socketCallback loop will exit after the read is interrupted.
   274  	m.stopRunning()
   275  
   276  	// Interrupt a blocked read.
   277  	m.nfqueue.Con.SetDeadline(time.Unix(0, 1))
   278  
   279  	// There's no socketCallback exit synchronization exposed by nfqueue. Calling
   280  	// nfqueue.Close while socketCallback is still running can result in errors
   281  	// such as "nfqueuenfqueue_gteq_1.12.go:134: Could not unbind from queue:
   282  	// netlink send: sendmsg: bad file descriptor"; and closing the raw socket
   283  	// file descriptors while socketCallback is still running can result in
   284  	// errors such as "packetman.(*Manipulator).injectPackets#604: bad file
   285  	// descriptor".
   286  	//
   287  	// Attempt to avoid invalid file descriptor operations and spurious error
   288  	// messages by sleeping for a short period, allowing socketCallback to poll
   289  	// the context and exit.
   290  	time.Sleep(100 * time.Millisecond)
   291  
   292  	m.nfqueue.Close()
   293  
   294  	syscall.Close(m.injectIPv4FD)
   295  
   296  	if m.injectIPv6FD != 0 {
   297  		syscall.Close(m.injectIPv6FD)
   298  	}
   299  
   300  	m.configureIPTables(false)
   301  }
   302  
   303  // SetSpecs installs a new set of packet transformation Spec values, replacing
   304  // the initial specs from Config.Specs, or any previous SetSpecs call. When
   305  // SetSpecs returns an error, the previous set of specs is retained.
   306  func (m *Manipulator) SetSpecs(specs []*Spec) error {
   307  
   308  	compiledSpecs := make(map[string]*compiledSpec)
   309  	for _, spec := range specs {
   310  		if spec.Name == "" {
   311  			return errors.TraceNew("invalid spec name")
   312  		}
   313  		if _, ok := compiledSpecs[spec.Name]; ok {
   314  			return errors.TraceNew("duplicate spec name")
   315  		}
   316  		compiledSpec, err := compileSpec(spec)
   317  		if err != nil {
   318  			return errors.Trace(err)
   319  		}
   320  		compiledSpecs[spec.Name] = compiledSpec
   321  	}
   322  
   323  	m.compiledSpecsMutex.Lock()
   324  	m.compiledSpecs = compiledSpecs
   325  	m.compiledSpecsMutex.Unlock()
   326  
   327  	return nil
   328  }
   329  
   330  func makeConnectionID(
   331  	srcIP net.IP, srcPort uint16, dstIP net.IP, dstPort uint16) string {
   332  
   333  	// Create a unique connection ID, for appliedSpecCache, from the 4-tuple
   334  	// srcIP, dstIP, srcPort, dstPort. In the SYN/ACK context, src is the server
   335  	// and dst is the client.
   336  	//
   337  	// Limitation: there may be many repeat connections from one dstIP,
   338  	// especially if many clients are behind the same NAT. Each TCP connection
   339  	// will have a distinct dstPort. In principle, there remains a race between
   340  	// populating appliedSpecCache, the TCP connection terminating on the
   341  	// client-side and the NAT reusing the dstPort, and consuming
   342  	// appliedSpecCache.
   343  
   344  	// From: https://github.com/golang/go/blob/b88efc7e7ac15f9e0b5d8d9c82f870294f6a3839/src/net/ip.go#L55
   345  	var v4InV6Prefix = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}
   346  	const uint16Len = 2
   347  
   348  	var connID [net.IPv6len + uint16Len + net.IPv6len + uint16Len]byte
   349  
   350  	offset := 0
   351  	if len(srcIP) == net.IPv4len {
   352  		copy(connID[offset:], v4InV6Prefix)
   353  		offset += len(v4InV6Prefix)
   354  		copy(connID[offset:], srcIP)
   355  		offset += len(srcIP)
   356  	} else { // net.IPv6len
   357  		copy(connID[offset:], srcIP)
   358  		offset += len(srcIP)
   359  	}
   360  	binary.BigEndian.PutUint16(connID[offset:], srcPort)
   361  	offset += uint16Len
   362  
   363  	if len(dstIP) == net.IPv4len {
   364  		copy(connID[offset:], v4InV6Prefix)
   365  		offset += len(v4InV6Prefix)
   366  		copy(connID[offset:], dstIP)
   367  		offset += len(dstIP)
   368  	} else { // net.IPv6len
   369  		copy(connID[offset:], dstIP)
   370  		offset += len(dstIP)
   371  	}
   372  	binary.BigEndian.PutUint16(connID[offset:], dstPort)
   373  	offset += uint16Len
   374  
   375  	return string(connID[:])
   376  }
   377  
   378  type appliedSpec struct {
   379  	specName  string
   380  	extraData interface{}
   381  }
   382  
   383  // GetAppliedSpecName returns the packet manipulation spec name applied to the
   384  // TCP connection, represented by its local and remote address components,
   385  // that was ultimately accepted by a network listener. The second return value
   386  // is the arbitrary extra data returned by GetSpecName.
   387  //
   388  // This allows SelectSpecName, the spec selector, to be non-deterministic
   389  // while also allowing for accurate packet manipulation metrics to be
   390  // associated with each TCP connection.
   391  //
   392  // For a given connection, GetAppliedSpecName must be called before a TTL
   393  // clears the stored value. Calling GetAppliedSpecName immediately clears the
   394  // stored value for the given connection.
   395  //
   396  // To obtain the correct result GetAppliedSpecName must be called with a
   397  // RemoteAddr which reflects the true immediate network peer address. In
   398  // particular, for proxied net.Conns which present a synthetic RemoteAddr with
   399  // the original address of a proxied client (e.g., armon/go-proxyproto, or
   400  // psiphon/server.meekConn) the true peer RemoteAddr must instead be
   401  // provided.
   402  func (m *Manipulator) GetAppliedSpecName(
   403  	localAddr, remoteAddr *net.TCPAddr) (string, interface{}, error) {
   404  
   405  	connID := makeConnectionID(
   406  		localAddr.IP,
   407  		uint16(localAddr.Port),
   408  		remoteAddr.IP,
   409  		uint16(remoteAddr.Port))
   410  
   411  	value, found := m.appliedSpecCache.Get(connID)
   412  	if !found {
   413  		return "", nil, errors.TraceNew("connection not found")
   414  	}
   415  
   416  	appliedSpec := value.(appliedSpec)
   417  
   418  	m.appliedSpecCache.Delete(connID)
   419  
   420  	return appliedSpec.specName, appliedSpec.extraData, nil
   421  }
   422  
   423  func (m *Manipulator) setAppliedSpecName(
   424  	interceptedPacket gopacket.Packet,
   425  	specName string,
   426  	extraData interface{}) {
   427  
   428  	srcIP, dstIP, _, _ := m.getPacketAddressInfo(interceptedPacket)
   429  
   430  	interceptedTCP := interceptedPacket.Layer(layers.LayerTypeTCP).(*layers.TCP)
   431  
   432  	connID := makeConnectionID(
   433  		srcIP,
   434  		uint16(interceptedTCP.SrcPort),
   435  		dstIP,
   436  		uint16(interceptedTCP.DstPort))
   437  
   438  	m.appliedSpecCache.Set(
   439  		connID,
   440  		appliedSpec{
   441  			specName:  specName,
   442  			extraData: extraData,
   443  		},
   444  		cache.DefaultExpiration)
   445  }
   446  
   447  func (m *Manipulator) getSocketMark() int {
   448  	if m.config.SocketMark == 0 {
   449  		return defaultSocketMark
   450  	}
   451  	return m.config.SocketMark
   452  }
   453  
   454  func (m *Manipulator) handleInterceptedPacket(attr nfqueue.Attribute) int {
   455  
   456  	if attr.PacketID == nil || attr.Payload == nil {
   457  		m.config.Logger.WithTrace().Warning("missing nfqueue data")
   458  		return 0
   459  	}
   460  
   461  	// Trigger packet manipulation only if the packet is a SYN-ACK and has no
   462  	// payload (which a transformation _may_ discard). The iptables filter for
   463  	// NFQUEUE should already ensure that only SYN-ACK packets are sent through
   464  	// the queue. To avoid breaking all TCP connections in an unanticipated case,
   465  	// fail open -- allow the packet -- if these conditions are not met or if
   466  	// parsing the packet fails.
   467  
   468  	packet, err := m.parseInterceptedPacket(*attr.Payload)
   469  	if err != nil {
   470  
   471  		// Fail open in this case.
   472  		m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
   473  
   474  		m.config.Logger.WithTraceFields(
   475  			common.LogFields{"error": err}).Warning("unexpected packet")
   476  		return 0
   477  	}
   478  
   479  	spec, extraData, err := m.getCompiledSpec(packet)
   480  	if err != nil {
   481  
   482  		// Fail open in this case.
   483  		m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
   484  
   485  		m.config.Logger.WithTraceFields(
   486  			common.LogFields{"error": err}).Warning("get strategy failed")
   487  		return 0
   488  	}
   489  
   490  	// Call setAppliedSpecName cache _before_ accepting the packet or injecting
   491  	// manipulated packets to avoid a potential race in which the TCP handshake
   492  	// completes and GetAppliedSpecName is called before the cache is populated.
   493  
   494  	if spec == nil {
   495  
   496  		// No packet manipulation in this case.
   497  		m.setAppliedSpecName(packet, "", extraData)
   498  		m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfAccept)
   499  		return 0
   500  	}
   501  
   502  	m.setAppliedSpecName(packet, spec.name, extraData)
   503  	m.nfqueue.SetVerdict(*attr.PacketID, nfqueue.NfDrop)
   504  
   505  	err = m.injectPackets(packet, spec)
   506  	if err != nil {
   507  		m.config.Logger.WithTraceFields(
   508  			common.LogFields{"error": err}).Warning("inject packets failed")
   509  		return 0
   510  	}
   511  
   512  	return 0
   513  }
   514  
   515  func (m *Manipulator) parseInterceptedPacket(packetData []byte) (gopacket.Packet, error) {
   516  
   517  	// Note that NFQUEUE doesn't send an Ethernet layer. This first layer is
   518  	// either IPv4 or IPv6.
   519  	//
   520  	// As we parse only one packet per TCP connection, we are not using the
   521  	// faster DecodingLayerParser API,
   522  	// https://godoc.org/github.com/google/gopacket#hdr-Fast_Decoding_With_DecodingLayerParser,
   523  	// or zero-copy approaches.
   524  	//
   525  	// TODO: use a stub gopacket.Decoder as the first layer to avoid the extra
   526  	// NewPacket call? Use distinct NFQUEUE queue numbers and nfqueue instances
   527  	// for IPv4 and IPv6?
   528  
   529  	packet := gopacket.NewPacket(packetData, layers.LayerTypeIPv4, gopacket.Default)
   530  
   531  	if packet.ErrorLayer() != nil {
   532  		packet = gopacket.NewPacket(packetData, layers.LayerTypeIPv6, gopacket.Default)
   533  	}
   534  
   535  	errLayer := packet.ErrorLayer()
   536  	if errLayer != nil {
   537  		return nil, errors.Trace(errLayer.Error())
   538  	}
   539  
   540  	// After this check, Layer([IPv4,IPv6]/TCP) return values are assumed to be
   541  	// non-nil and unchecked layer type assertions are assumed safe.
   542  
   543  	tcpLayer := packet.Layer(layers.LayerTypeTCP)
   544  	if tcpLayer == nil {
   545  		return nil, errors.TraceNew("missing TCP layer")
   546  	}
   547  
   548  	if packet.Layer(gopacket.LayerTypePayload) != nil {
   549  		return nil, errors.TraceNew("unexpected payload layer")
   550  	}
   551  
   552  	tcp := tcpLayer.(*layers.TCP)
   553  
   554  	// Any of the ECN TCP flags (https://tools.ietf.org/html/rfc3168 and
   555  	// rfc3540), ECE/CWR/NS, may be set in a SYN-ACK, and are retained.
   556  	//
   557  	// Limitation: these additional flags are retained as-is on injected packets
   558  	// only when no TCP flag transformation is applied.
   559  
   560  	if !tcp.SYN || !tcp.ACK ||
   561  		tcp.FIN || tcp.RST || tcp.PSH || tcp.URG {
   562  		return nil, errors.TraceNew("unexpected TCP flags")
   563  	}
   564  
   565  	stripEOLOption(packet)
   566  
   567  	return packet, nil
   568  }
   569  
   570  func (m *Manipulator) getCompiledSpec(
   571  	interceptedPacket gopacket.Packet) (*compiledSpec, interface{}, error) {
   572  
   573  	_, dstIP, _, _ := m.getPacketAddressInfo(interceptedPacket)
   574  
   575  	interceptedTCP := interceptedPacket.Layer(layers.LayerTypeTCP).(*layers.TCP)
   576  
   577  	protocolPort := interceptedTCP.SrcPort
   578  	clientIP := dstIP
   579  
   580  	specName, extraData := m.config.SelectSpecName(int(protocolPort), clientIP)
   581  	if specName == "" {
   582  		return nil, extraData, nil
   583  	}
   584  
   585  	// Concurrency note: m.compiledSpecs may be replaced by SetSpecs, but any
   586  	// reference to an individual compiledSpec remains valid; each compiledSpec
   587  	// is read-only.
   588  
   589  	m.compiledSpecsMutex.Lock()
   590  	spec, ok := m.compiledSpecs[specName]
   591  	m.compiledSpecsMutex.Unlock()
   592  
   593  	if !ok {
   594  		return nil, nil, errors.Tracef("invalid spec name: %s", specName)
   595  	}
   596  
   597  	return spec, extraData, nil
   598  }
   599  
   600  func (m *Manipulator) injectPackets(interceptedPacket gopacket.Packet, spec *compiledSpec) error {
   601  
   602  	// A sockAddr parameter with dstIP (but not port) set appears to be required
   603  	// even with the IP_HDRINCL socket option.
   604  
   605  	_, _, injectFD, sockAddr := m.getPacketAddressInfo(interceptedPacket)
   606  
   607  	injectPackets, err := spec.apply(interceptedPacket)
   608  	if err != nil {
   609  		return errors.Trace(err)
   610  	}
   611  
   612  	for _, injectPacket := range injectPackets {
   613  
   614  		err = syscall.Sendto(injectFD, injectPacket, 0, sockAddr)
   615  		if err != nil {
   616  			return errors.Trace(err)
   617  		}
   618  	}
   619  
   620  	return nil
   621  }
   622  
   623  func (m *Manipulator) getPacketAddressInfo(interceptedPacket gopacket.Packet) (net.IP, net.IP, int, syscall.Sockaddr) {
   624  
   625  	var srcIP, dstIP net.IP
   626  	var injectFD int
   627  	var sockAddr syscall.Sockaddr
   628  
   629  	ipv4Layer := interceptedPacket.Layer(layers.LayerTypeIPv4)
   630  	if ipv4Layer != nil {
   631  		interceptedIPv4 := ipv4Layer.(*layers.IPv4)
   632  		srcIP = interceptedIPv4.SrcIP
   633  		dstIP = interceptedIPv4.DstIP
   634  		injectFD = m.injectIPv4FD
   635  		var ipv4 [4]byte
   636  		copy(ipv4[:], interceptedIPv4.DstIP.To4())
   637  		sockAddr = &syscall.SockaddrInet4{Addr: ipv4, Port: 0}
   638  	} else {
   639  		interceptedIPv6 := interceptedPacket.Layer(layers.LayerTypeIPv6).(*layers.IPv6)
   640  		srcIP = interceptedIPv6.SrcIP
   641  		dstIP = interceptedIPv6.DstIP
   642  		injectFD = m.injectIPv6FD
   643  		var ipv6 [16]byte
   644  		copy(ipv6[:], interceptedIPv6.DstIP.To16())
   645  		sockAddr = &syscall.SockaddrInet6{Addr: ipv6, Port: 0}
   646  	}
   647  
   648  	return srcIP, dstIP, injectFD, sockAddr
   649  }
   650  
   651  func (m *Manipulator) configureIPTables(addRules bool) error {
   652  
   653  	execCommands := func(mode string) error {
   654  
   655  		ports := make([]string, len(m.config.ProtocolPorts))
   656  		for i, port := range m.config.ProtocolPorts {
   657  			ports[i] = strconv.Itoa(port)
   658  		}
   659  
   660  		socketMark := strconv.Itoa(m.getSocketMark())
   661  
   662  		args := []string{
   663  			mode, "OUTPUT",
   664  			"--protocol", "tcp",
   665  			"--match", "multiport",
   666  			"--source-ports", strings.Join(ports, ","),
   667  			"--match", "mark",
   668  			"!", "--mark", socketMark,
   669  			"--tcp-flags", "ALL", "SYN,ACK",
   670  			"-j", "NFQUEUE",
   671  			"--queue-bypass",
   672  			"--queue-num", strconv.Itoa(m.config.QueueNumber),
   673  		}
   674  
   675  		err := common.RunNetworkConfigCommand(
   676  			m.config.Logger,
   677  			m.config.SudoNetworkConfigCommands,
   678  			"iptables",
   679  			args...)
   680  		if mode != "-D" && err != nil {
   681  			return errors.Trace(err)
   682  		}
   683  
   684  		err = common.RunNetworkConfigCommand(
   685  			m.config.Logger,
   686  			m.config.SudoNetworkConfigCommands,
   687  			"ip6tables",
   688  			args...)
   689  		if mode != "-D" && err != nil {
   690  			if m.config.AllowNoIPv6NetworkConfiguration {
   691  				m.config.Logger.WithTraceFields(
   692  					common.LogFields{
   693  						"error": err}).Warning(
   694  					"configure IPv6 NFQUEUE failed")
   695  			} else {
   696  				return errors.Trace(err)
   697  			}
   698  		}
   699  
   700  		return nil
   701  	}
   702  
   703  	// To avoid duplicates, first try to drop existing rules, then add. Also try
   704  	// to revert any partial configuration in the case of an error.
   705  
   706  	_ = execCommands("-D")
   707  
   708  	if addRules {
   709  		err := execCommands("-I")
   710  		if err != nil {
   711  			_ = execCommands("-D")
   712  		}
   713  		return errors.Trace(err)
   714  	}
   715  
   716  	return nil
   717  }
   718  
   719  func newNfqueueLogger(logger common.Logger) *log.Logger {
   720  	return log.New(
   721  		&nfqueueLoggerWriter{logger: logger},
   722  		"nfqueue",
   723  		log.Lshortfile)
   724  }
   725  
   726  type nfqueueLoggerWriter struct {
   727  	logger common.Logger
   728  }
   729  
   730  func (n *nfqueueLoggerWriter) Write(p []byte) (int, error) {
   731  	n.logger.WithTraceFields(
   732  		common.LogFields{"log": string(p)}).Warning("nfqueue log")
   733  	return len(p), nil
   734  }