github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/tun/tun.go (about)

     1  /*
     2   * Copyright (c) 2017, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Copyright 2009 The Go Authors. All rights reserved.
    21  // Use of this source code is governed by a BSD-style
    22  // license that can be found in the LICENSE file.
    23  
    24  /*
    25  Package tun is an IP packet tunnel server and client. It supports tunneling
    26  both IPv4 and IPv6.
    27  
    28  	.........................................................       .-,(  ),-.
    29  	. [server]                                     .-----.  .    .-(          )-.
    30  	.                                              | NIC |<---->(    Internet    )
    31  	. .......................................      '-----'  .    '-(          ).-'
    32  	. . [packet tunnel daemon]              .         ^     .        '-.( ).-'
    33  	. .                                     .         |     .
    34  	. . ...........................         .         |     .
    35  	. . . [session]               .         .        NAT    .
    36  	. . .                         .         .         |     .
    37  	. . .                         .         .         v     .
    38  	. . .                         .         .       .---.   .
    39  	. . .                         .         .       | t |   .
    40  	. . .                         .         .       | u |   .
    41  	. . .                 .---.   .  .---.  .       | n |   .
    42  	. . .                 | q |   .  | d |  .       |   |   .
    43  	. . .                 | u |   .  | e |  .       | d |   .
    44  	. . .          .------| e |<-----| m |<---------| e |   .
    45  	. . .          |      | u |   .  | u |  .       | v |   .
    46  	. . .          |      | e |   .  | x |  .       | i |   .
    47  	. . .       rewrite   '---'   .  '---'  .       | c |   .
    48  	. . .          |              .         .       | e |   .
    49  	. . .          v              .         .       '---'   .
    50  	. . .     .---------.         .         .         ^     .
    51  	. . .     | channel |--rewrite--------------------'     .
    52  	. . .     '---------'         .         .               .
    53  	. . ...........^...............         .               .
    54  	. .............|.........................               .
    55  	...............|.........................................
    56  	               |
    57  	               | (typically via Internet)
    58  	               |
    59  	...............|.................
    60  	. [client]     |                .
    61  	.              |                .
    62  	. .............|............... .
    63  	. .            v              . .
    64  	. .       .---------.         . .
    65  	. .       | channel |         . .
    66  	. .       '---------'         . .
    67  	. .            ^              . .
    68  	. .............|............... .
    69  	.              v                .
    70  	.        .------------.         .
    71  	.        | tun device |         .
    72  	.        '------------'         .
    73  	.................................
    74  
    75  The client relays IP packets between a local tun device and a channel, which
    76  is a transport to the server. In Psiphon, the channel will be an SSH channel
    77  within an SSH connection to a Psiphon server.
    78  
    79  The server relays packets between each client and its own tun device. The
    80  server tun device is NATed to the Internet via an external network interface.
    81  In this way, client traffic is tunneled and will egress from the server host.
    82  
    83  Similar to a typical VPN, IP addresses are assigned to each client. Unlike
    84  a typical VPN, the assignment is not transmitted to the client. Instead, the
    85  server transparently rewrites the source addresses of client packets to
    86  the assigned IP address. The server also rewrites the destination address of
    87  certain DNS packets. The purpose of this is to allow clients to reconnect
    88  to different servers without having to tear down or change their local
    89  network configuration. Clients may configure their local tun device with an
    90  arbitrary IP address and a static DNS resolver address.
    91  
    92  The server uses the 24-bit 10.0.0.0/8 IPv4 private address space to maximize
    93  the number of addresses available, due to Psiphon client churn and minimum
    94  address lease time constraints. For IPv6, a 24-bit unique local space is used.
    95  When a client is allocated addresses, a unique, unused 24-bit "index" is
    96  reserved/leased. This index maps to and from IPv4 and IPv6 private addresses.
    97  The server multiplexes all client packets into a single tun device. When a
    98  packet is read, the destination address is used to map the packet back to the
    99  correct index, which maps back to the client.
   100  
   101  The server maintains client "sessions". A session maintains client IP
   102  address state and effectively holds the lease on assigned addresses. If a
   103  client is disconnected and quickly reconnects, it will resume its previous
   104  session, retaining its IP address and network connection states. Idle
   105  sessions with no client connection will eventually expire.
   106  
   107  Packet count and bytes transferred metrics are logged for each client session.
   108  
   109  The server integrates with and enforces Psiphon traffic rules and logging
   110  facilities. The server parses and validates packets. Client-to-client packets
   111  are not permitted. Only global unicast packets are permitted. Only TCP and UDP
   112  packets are permitted. The client also filters out, before sending, packets
   113  that the server won't route.
   114  
   115  Certain aspects of packet tunneling are outside the scope of this package;
   116  e.g, the Psiphon client and server are responsible for establishing an SSH
   117  channel and negotiating the correct MTU and DNS settings. The Psiphon
   118  server will call Server.ClientConnected when a client connects and establishes
   119  a packet tunnel channel; and Server.ClientDisconnected when the client closes
   120  the channel and/or disconnects.
   121  */
   122  package tun
   123  
   124  import (
   125  	"context"
   126  	"encoding/binary"
   127  	"fmt"
   128  	"io"
   129  	"math/rand"
   130  	"net"
   131  	"sync"
   132  	"sync/atomic"
   133  	"time"
   134  	"unsafe"
   135  
   136  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
   137  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
   138  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/monotime"
   139  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
   140  )
   141  
   142  const (
   143  	DEFAULT_MTU                          = 1500
   144  	DEFAULT_DOWNSTREAM_PACKET_QUEUE_SIZE = 32768 * 16
   145  	DEFAULT_UPSTREAM_PACKET_QUEUE_SIZE   = 32768
   146  	DEFAULT_IDLE_SESSION_EXPIRY_SECONDS  = 300
   147  	ORPHAN_METRICS_CHECKPOINTER_PERIOD   = 30 * time.Minute
   148  	FLOW_IDLE_EXPIRY                     = 60 * time.Second
   149  )
   150  
   151  // ServerConfig specifies the configuration of a packet tunnel server.
   152  type ServerConfig struct {
   153  
   154  	// Logger is used for logging events and metrics.
   155  	Logger common.Logger
   156  
   157  	// SudoNetworkConfigCommands specifies whether to use "sudo"
   158  	// when executing network configuration commands. This is required
   159  	// when the packet tunnel server is not run as root and when
   160  	// process capabilities are not available (only Linux kernel 4.3+
   161  	// has the required capabilities support). The host sudoers file
   162  	// must be configured to allow the tunnel server process user to
   163  	// execute the commands invoked in configureServerInterface; see
   164  	// the implementation for the appropriate platform.
   165  	SudoNetworkConfigCommands bool
   166  
   167  	// AllowNoIPv6NetworkConfiguration indicates that failures while
   168  	// configuring tun interfaces and routing for IPv6 are to be
   169  	// logged as warnings only. This option is intended to support
   170  	// test cases on hosts without IPv6 and is not for production use;
   171  	// the packet tunnel server will still accept IPv6 packets and
   172  	// relay them to the tun device.
   173  	// AllowNoIPv6NetworkConfiguration may not be supported on all
   174  	// platforms.
   175  	AllowNoIPv6NetworkConfiguration bool
   176  
   177  	// EgressInterface is the interface to which client traffic is
   178  	// masqueraded/NATed. For example, "eth0". If blank, a platform-
   179  	// appropriate default is used.
   180  	EgressInterface string
   181  
   182  	// GetDNSResolverIPv4Addresses is a function which returns the
   183  	// DNS resolvers to use as transparent DNS rewrite targets for
   184  	// IPv4 DNS traffic.
   185  	//
   186  	// GetDNSResolverIPv4Addresses is invoked for each new client
   187  	// session and the list of resolvers is stored with the session.
   188  	// This is a compromise between checking current resolvers for
   189  	// each packet (too expensive) and simply passing in a static
   190  	// list (won't pick up resolver changes). As implemented, only
   191  	// new client sessions will pick up resolver changes.
   192  	//
   193  	// Transparent DNS rewriting occurs when the client uses the
   194  	// specific, target transparent DNS addresses specified by
   195  	// GetTransparentDNSResolverIPv4/6Address.
   196  	//
   197  	// For outbound DNS packets with a target resolver IP address,
   198  	// a random resolver is selected and used for the rewrite.
   199  	// For inbound packets, _any_ resolver in the list is rewritten
   200  	// back to the target resolver IP address. As a side-effect,
   201  	// responses to client DNS packets originally destined for a
   202  	// resolver in GetDNSResolverIPv4Addresses will be lost.
   203  	GetDNSResolverIPv4Addresses func() []net.IP
   204  
   205  	// GetDNSResolverIPv6Addresses is a function which returns the
   206  	// DNS resolvers to use as transparent DNS rewrite targets for
   207  	// IPv6 DNS traffic. It functions like GetDNSResolverIPv4Addresses.
   208  	GetDNSResolverIPv6Addresses func() []net.IP
   209  
   210  	// EnableDNSFlowTracking specifies whether to apply flow tracking to DNS
   211  	// flows, as required for DNS quality metrics. Typically there are many
   212  	// short-lived DNS flows to track and each tracked flow adds some overhead,
   213  	// so this defaults to off.
   214  	EnableDNSFlowTracking bool
   215  
   216  	// DownstreamPacketQueueSize specifies the size of the downstream
   217  	// packet queue. The packet tunnel server multiplexes all client
   218  	// packets through a single tun device, so when a packet is read,
   219  	// it must be queued or dropped if it cannot be immediately routed
   220  	// to the appropriate client. Note that the TCP and SSH windows
   221  	// for the underlying channel transport will impact transfer rate
   222  	// and queuing.
   223  	// When DownstreamPacketQueueSize is 0, a default value tuned for
   224  	// Psiphon is used.
   225  	DownstreamPacketQueueSize int
   226  
   227  	// MTU specifies the maximum transmission unit for the packet
   228  	// tunnel. Clients must be configured with the same MTU. The
   229  	// server's tun device will be set to this MTU value and is
   230  	// assumed not to change for the duration of the server.
   231  	// When MTU is 0, a default value is used.
   232  	MTU int
   233  
   234  	// SessionIdleExpirySeconds specifies how long to retain client
   235  	// sessions which have no client attached. Sessions are retained
   236  	// across client connections so reconnecting clients can resume
   237  	// a previous session. Resuming avoids leasing new IP addresses
   238  	// for reconnection, and also retains NAT state for active
   239  	// tunneled connections.
   240  	//
   241  	// SessionIdleExpirySeconds is also, effectively, the lease
   242  	// time for assigned IP addresses.
   243  	SessionIdleExpirySeconds int
   244  
   245  	// AllowBogons disables bogon checks. This should be used only
   246  	// for testing.
   247  	AllowBogons bool
   248  }
   249  
   250  // Server is a packet tunnel server. A packet tunnel server
   251  // maintains client sessions, relays packets through client
   252  // channels, and multiplexes packets through a single tun
   253  // device. The server assigns IP addresses to clients, performs
   254  // IP address and transparent DNS rewriting, and enforces
   255  // traffic rules.
   256  type Server struct {
   257  	config              *ServerConfig
   258  	device              *Device
   259  	indexToSession      sync.Map
   260  	sessionIDToIndex    sync.Map
   261  	connectedInProgress *sync.WaitGroup
   262  	workers             *sync.WaitGroup
   263  	runContext          context.Context
   264  	stopRunning         context.CancelFunc
   265  	orphanMetrics       *packetMetrics
   266  }
   267  
   268  // NewServer initializes a server.
   269  func NewServer(config *ServerConfig) (*Server, error) {
   270  
   271  	device, err := NewServerDevice(config)
   272  	if err != nil {
   273  		return nil, errors.Trace(err)
   274  	}
   275  
   276  	runContext, stopRunning := context.WithCancel(context.Background())
   277  
   278  	return &Server{
   279  		config:              config,
   280  		device:              device,
   281  		connectedInProgress: new(sync.WaitGroup),
   282  		workers:             new(sync.WaitGroup),
   283  		runContext:          runContext,
   284  		stopRunning:         stopRunning,
   285  		orphanMetrics:       new(packetMetrics),
   286  	}, nil
   287  }
   288  
   289  // Start starts a server and returns with it running.
   290  func (server *Server) Start() {
   291  
   292  	server.config.Logger.WithTrace().Info("starting")
   293  
   294  	server.workers.Add(1)
   295  	go server.runSessionReaper()
   296  
   297  	server.workers.Add(1)
   298  	go server.runOrphanMetricsCheckpointer()
   299  
   300  	server.workers.Add(1)
   301  	go server.runDeviceDownstream()
   302  }
   303  
   304  // Stop halts a running server.
   305  func (server *Server) Stop() {
   306  
   307  	server.config.Logger.WithTrace().Info("stopping")
   308  
   309  	server.stopRunning()
   310  
   311  	// Interrupt blocked device read/writes.
   312  	server.device.Close()
   313  
   314  	// Wait for any in-progress ClientConnected calls to complete.
   315  	server.connectedInProgress.Wait()
   316  
   317  	// After this point, no further clients will be added: all
   318  	// in-progress ClientConnected calls have finished; and any
   319  	// later ClientConnected calls won't get past their
   320  	// server.runContext.Done() checks.
   321  
   322  	// Close all clients. Client workers will be joined
   323  	// by the following server.workers.Wait().
   324  	server.indexToSession.Range(func(_, value interface{}) bool {
   325  		session := value.(*session)
   326  		server.interruptSession(session)
   327  		return true
   328  	})
   329  
   330  	server.workers.Wait()
   331  
   332  	server.config.Logger.WithTrace().Info("stopped")
   333  }
   334  
   335  // AllowedPortChecker is a function which returns true when it is
   336  // permitted to relay packets to the specified upstream IP address
   337  // and/or port.
   338  type AllowedPortChecker func(upstreamIPAddress net.IP, port int) bool
   339  
   340  // AllowedDomainChecker is a function which returns true when it is
   341  // permitted to resolve the specified domain name.
   342  type AllowedDomainChecker func(string) bool
   343  
   344  // FlowActivityUpdater defines an interface for receiving updates for
   345  // flow activity. Values passed to UpdateProgress are bytes transferred
   346  // and flow duration since the previous UpdateProgress.
   347  type FlowActivityUpdater interface {
   348  	UpdateProgress(downstreamBytes, upstreamBytes, durationNanoseconds int64)
   349  }
   350  
   351  // FlowActivityUpdaterMaker is a function which returns a list of
   352  // appropriate updaters for a new flow to the specified upstream
   353  // hostname (if known -- may be ""), and IP address.
   354  // The flow is TCP when isTCP is true, and UDP otherwise.
   355  type FlowActivityUpdaterMaker func(
   356  	isTCP bool, upstreamHostname string, upstreamIPAddress net.IP) []FlowActivityUpdater
   357  
   358  // MetricsUpdater is a function which receives a checkpoint summary
   359  // of application bytes transferred through a packet tunnel.
   360  type MetricsUpdater func(
   361  	TCPApplicationBytesDown, TCPApplicationBytesUp,
   362  	UDPApplicationBytesDown, UDPApplicationBytesUp int64)
   363  
   364  // DNSQualityReporter is a function which receives a DNS quality report:
   365  // whether a DNS request received a reponse, the elapsed time, and the
   366  // resolver used.
   367  type DNSQualityReporter func(
   368  	receivedResponse bool, requestDuration time.Duration, resolverIP net.IP)
   369  
   370  // ClientConnected handles new client connections, creating or resuming
   371  // a session and returns with client packet handlers running.
   372  //
   373  // sessionID is used to identify sessions for resumption.
   374  //
   375  // transport provides the channel for relaying packets to and from
   376  // the client.
   377  //
   378  // checkAllowedTCPPortFunc/checkAllowedUDPPortFunc/checkAllowedDomainFunc
   379  // are callbacks used to enforce traffic rules. For each TCP/UDP flow, the
   380  // corresponding AllowedPort function is called to check if traffic to the
   381  // packet's port is permitted. For upstream DNS query packets,
   382  // checkAllowedDomainFunc is called to check if domain resolution is
   383  // permitted. These callbacks must be efficient and safe for concurrent
   384  // calls.
   385  //
   386  // flowActivityUpdaterMaker is a callback invoked for each new packet
   387  // flow; it may create updaters to track flow activity.
   388  //
   389  // metricsUpdater is a callback invoked at metrics checkpoints (usually
   390  // when the client disconnects) with a summary of application bytes
   391  // transferred.
   392  //
   393  // It is safe to make concurrent calls to ClientConnected for distinct
   394  // session IDs. The caller is responsible for serializing calls with the
   395  // same session ID. Further, the caller must ensure, in the case of a client
   396  // transport reconnect when an existing transport has not yet disconnected,
   397  // that ClientDisconnected is called first -- so it doesn't undo the new
   398  // ClientConnected. (psiphond meets these constraints by closing any
   399  // existing SSH client with duplicate session ID early in the lifecycle of
   400  // a new SSH client connection.)
   401  func (server *Server) ClientConnected(
   402  	sessionID string,
   403  	transport io.ReadWriteCloser,
   404  	checkAllowedTCPPortFunc, checkAllowedUDPPortFunc AllowedPortChecker,
   405  	checkAllowedDomainFunc AllowedDomainChecker,
   406  	flowActivityUpdaterMaker FlowActivityUpdaterMaker,
   407  	metricsUpdater MetricsUpdater,
   408  	dnsQualityReporter DNSQualityReporter) error {
   409  
   410  	// It's unusual to call both sync.WaitGroup.Add() _and_ Done() in the same
   411  	// goroutine. There's no other place to call Add() since ClientConnected is
   412  	// an API entrypoint. And Done() works because the invariant enforced by
   413  	// connectedInProgress.Wait() is not that no ClientConnected calls are in
   414  	// progress, but that no such calls are in progress past the
   415  	// server.runContext.Done() check.
   416  
   417  	// TODO: will this violate https://golang.org/pkg/sync/#WaitGroup.Add:
   418  	// "calls with a positive delta that occur when the counter is zero must happen before a Wait"?
   419  
   420  	server.connectedInProgress.Add(1)
   421  	defer server.connectedInProgress.Done()
   422  
   423  	select {
   424  	case <-server.runContext.Done():
   425  		return errors.TraceNew("server stopping")
   426  	default:
   427  	}
   428  
   429  	server.config.Logger.WithTraceFields(
   430  		common.LogFields{"sessionID": sessionID}).Debug("client connected")
   431  
   432  	MTU := getMTU(server.config.MTU)
   433  
   434  	clientSession := server.getSession(sessionID)
   435  
   436  	if clientSession != nil {
   437  
   438  		// Call interruptSession to ensure session is in the
   439  		// expected idle state.
   440  
   441  		server.interruptSession(clientSession)
   442  
   443  		// Note: we don't check the session expiry; whether it has
   444  		// already expired and not yet been reaped; or is about
   445  		// to expire very shortly. It could happen that the reaper
   446  		// will kill this session between now and when the expiry
   447  		// is reset in the following resumeSession call. In this
   448  		// unlikely case, the packet tunnel client should reconnect.
   449  
   450  	} else {
   451  
   452  		// Store IPv4 resolver addresses in 4-byte representation
   453  		// for use in rewritting.
   454  		resolvers := server.config.GetDNSResolverIPv4Addresses()
   455  		DNSResolverIPv4Addresses := make([]net.IP, len(resolvers))
   456  		for i, resolver := range resolvers {
   457  			// Assumes To4 is non-nil
   458  			DNSResolverIPv4Addresses[i] = resolver.To4()
   459  		}
   460  
   461  		clientSession = &session{
   462  			allowBogons:              server.config.AllowBogons,
   463  			lastActivity:             int64(monotime.Now()),
   464  			sessionID:                sessionID,
   465  			metrics:                  new(packetMetrics),
   466  			enableDNSFlowTracking:    server.config.EnableDNSFlowTracking,
   467  			DNSResolverIPv4Addresses: append([]net.IP(nil), DNSResolverIPv4Addresses...),
   468  			DNSResolverIPv6Addresses: append([]net.IP(nil), server.config.GetDNSResolverIPv6Addresses()...),
   469  			workers:                  new(sync.WaitGroup),
   470  		}
   471  
   472  		// One-time, for this session, random resolver selection for TCP transparent
   473  		// DNS forwarding. See comment in processPacket.
   474  		if len(clientSession.DNSResolverIPv4Addresses) > 0 {
   475  			clientSession.TCPDNSResolverIPv4Index = prng.Intn(len(clientSession.DNSResolverIPv4Addresses))
   476  		}
   477  		if len(clientSession.DNSResolverIPv6Addresses) > 0 {
   478  			clientSession.TCPDNSResolverIPv6Index = prng.Intn(len(clientSession.DNSResolverIPv6Addresses))
   479  		}
   480  
   481  		// allocateIndex initializes session.index, session.assignedIPv4Address,
   482  		// and session.assignedIPv6Address; and updates server.indexToSession and
   483  		// server.sessionIDToIndex.
   484  
   485  		err := server.allocateIndex(clientSession)
   486  		if err != nil {
   487  			return errors.Trace(err)
   488  		}
   489  	}
   490  
   491  	// Note: it's possible that a client disconnects (or reconnects before a
   492  	// disconnect is detected) and interruptSession is called between
   493  	// allocateIndex and resumeSession calls here, so interruptSession and
   494  	// related code must not assume resumeSession has been called.
   495  
   496  	server.resumeSession(
   497  		clientSession,
   498  		NewChannel(transport, MTU),
   499  		checkAllowedTCPPortFunc,
   500  		checkAllowedUDPPortFunc,
   501  		checkAllowedDomainFunc,
   502  		flowActivityUpdaterMaker,
   503  		metricsUpdater,
   504  		dnsQualityReporter)
   505  
   506  	return nil
   507  }
   508  
   509  // ClientDisconnected handles clients disconnecting. Packet handlers
   510  // are halted, but the client session is left intact to reserve the
   511  // assigned IP addresses and retain network state in case the client
   512  // soon reconnects.
   513  func (server *Server) ClientDisconnected(sessionID string) {
   514  
   515  	session := server.getSession(sessionID)
   516  	if session != nil {
   517  
   518  		server.config.Logger.WithTraceFields(
   519  			common.LogFields{"sessionID": sessionID}).Debug("client disconnected")
   520  
   521  		server.interruptSession(session)
   522  	}
   523  }
   524  
   525  func (server *Server) getSession(sessionID string) *session {
   526  
   527  	if index, ok := server.sessionIDToIndex.Load(sessionID); ok {
   528  		s, ok := server.indexToSession.Load(index.(int32))
   529  		if ok {
   530  			return s.(*session)
   531  		}
   532  		server.config.Logger.WithTrace().Warning("unexpected missing session")
   533  	}
   534  	return nil
   535  }
   536  
   537  func (server *Server) resumeSession(
   538  	session *session,
   539  	channel *Channel,
   540  	checkAllowedTCPPortFunc, checkAllowedUDPPortFunc AllowedPortChecker,
   541  	checkAllowedDomainFunc AllowedDomainChecker,
   542  	flowActivityUpdaterMaker FlowActivityUpdaterMaker,
   543  	metricsUpdater MetricsUpdater,
   544  	dnsQualityReporter DNSQualityReporter) {
   545  
   546  	session.mutex.Lock()
   547  	defer session.mutex.Unlock()
   548  
   549  	// Performance/concurrency note: the downstream packet queue
   550  	// and various packet event callbacks may be accessed while
   551  	// the session is idle, via the runDeviceDownstream goroutine,
   552  	// which runs concurrent to resumeSession/interruptSession calls.
   553  	// Consequently, all accesses to these fields must be
   554  	// synchronized.
   555  	//
   556  	// Benchmarking indicates the atomic.LoadPointer mechanism
   557  	// outperforms a mutex; approx. 2 ns/op vs. 20 ns/op in the case
   558  	// of getCheckAllowedTCPPortFunc. Since these accesses occur
   559  	// multiple times per packet, atomic.LoadPointer is used and so
   560  	// each of these fields is an unsafe.Pointer in the session
   561  	// struct.
   562  
   563  	// Begin buffering downstream packets.
   564  
   565  	downstreamPacketQueueSize := DEFAULT_DOWNSTREAM_PACKET_QUEUE_SIZE
   566  	if server.config.DownstreamPacketQueueSize > 0 {
   567  		downstreamPacketQueueSize = server.config.DownstreamPacketQueueSize
   568  	}
   569  	downstreamPackets := NewPacketQueue(downstreamPacketQueueSize)
   570  
   571  	session.setDownstreamPackets(downstreamPackets)
   572  
   573  	// Set new access control, flow monitoring, and metrics
   574  	// callbacks; all associated with the new client connection.
   575  
   576  	// IMPORTANT: any new callbacks or references to the outer client added
   577  	// here must be cleared in interruptSession to ensure that a paused
   578  	// session does not retain references to old client connection objects
   579  	// after the client disconnects.
   580  
   581  	session.setCheckAllowedTCPPortFunc(&checkAllowedTCPPortFunc)
   582  
   583  	session.setCheckAllowedUDPPortFunc(&checkAllowedUDPPortFunc)
   584  
   585  	session.setCheckAllowedDomainFunc(&checkAllowedDomainFunc)
   586  
   587  	session.setFlowActivityUpdaterMaker(&flowActivityUpdaterMaker)
   588  
   589  	session.setMetricsUpdater(&metricsUpdater)
   590  
   591  	session.setDNSQualityReporter(&dnsQualityReporter)
   592  
   593  	session.channel = channel
   594  
   595  	// Parent context is not server.runContext so that session workers
   596  	// need only check session.stopRunning to act on shutdown events.
   597  	session.runContext, session.stopRunning = context.WithCancel(context.Background())
   598  
   599  	// When a session is interrupted, all goroutines in session.workers
   600  	// are joined. When the server is stopped, all goroutines in
   601  	// server.workers are joined. So, in both cases we synchronously
   602  	// stop all workers associated with this session.
   603  
   604  	session.workers.Add(1)
   605  	go server.runClientUpstream(session)
   606  
   607  	session.workers.Add(1)
   608  	go server.runClientDownstream(session)
   609  
   610  	session.touch()
   611  }
   612  
   613  func (server *Server) interruptSession(session *session) {
   614  
   615  	session.mutex.Lock()
   616  	defer session.mutex.Unlock()
   617  
   618  	wasRunning := (session.channel != nil)
   619  
   620  	if session.stopRunning != nil {
   621  		session.stopRunning()
   622  	}
   623  
   624  	if session.channel != nil {
   625  		// Interrupt blocked channel read/writes.
   626  		session.channel.Close()
   627  	}
   628  
   629  	session.workers.Wait()
   630  
   631  	if session.channel != nil {
   632  		// Don't hold a reference to channel, allowing both it and
   633  		// its conn to be garbage collected.
   634  		// Setting channel to nil must happen after workers.Wait()
   635  		// to ensure no goroutine remains which may access
   636  		// session.channel.
   637  		session.channel = nil
   638  	}
   639  
   640  	metricsUpdater := session.getMetricsUpdater()
   641  
   642  	// interruptSession may be called for idle sessions, to ensure
   643  	// the session is in an expected state: in ClientConnected,
   644  	// and in server.Stop(); don't log in those cases.
   645  	if wasRunning {
   646  		session.metrics.checkpoint(
   647  			server.config.Logger,
   648  			metricsUpdater,
   649  			"server_packet_metrics",
   650  			packetMetricsAll)
   651  	}
   652  
   653  	// Release the downstream packet buffer, so the associated
   654  	// memory is not consumed while no client is connected.
   655  	//
   656  	// Since runDeviceDownstream continues to run and will access
   657  	// session.downstreamPackets, an atomic pointer is used to
   658  	// synchronize access.
   659  	session.setDownstreamPackets(nil)
   660  
   661  	session.setCheckAllowedTCPPortFunc(nil)
   662  
   663  	session.setCheckAllowedUDPPortFunc(nil)
   664  
   665  	session.setCheckAllowedDomainFunc(nil)
   666  
   667  	session.setFlowActivityUpdaterMaker(nil)
   668  
   669  	session.setMetricsUpdater(nil)
   670  
   671  	session.setDNSQualityReporter(nil)
   672  }
   673  
   674  func (server *Server) runSessionReaper() {
   675  
   676  	defer server.workers.Done()
   677  
   678  	// Periodically iterate over all sessions and discard expired
   679  	// sessions. This action, removing the index from server.indexToSession,
   680  	// releases the IP addresses assigned  to the session.
   681  
   682  	// TODO: As-is, this will discard sessions for live SSH tunnels,
   683  	// as long as the SSH channel for such a session has been idle for
   684  	// a sufficient period. Should the session be retained as long as
   685  	// the SSH tunnel is alive (e.g., expose and call session.touch()
   686  	// on keepalive events)? Or is it better to free up resources held
   687  	// by idle sessions?
   688  
   689  	idleExpiry := server.sessionIdleExpiry()
   690  
   691  	ticker := time.NewTicker(idleExpiry / 2)
   692  	defer ticker.Stop()
   693  
   694  	for {
   695  		select {
   696  		case <-ticker.C:
   697  			server.indexToSession.Range(func(_, value interface{}) bool {
   698  				session := value.(*session)
   699  				if session.expired(idleExpiry) {
   700  					server.removeSession(session)
   701  				}
   702  				return true
   703  			})
   704  		case <-server.runContext.Done():
   705  			return
   706  		}
   707  	}
   708  }
   709  
   710  func (server *Server) sessionIdleExpiry() time.Duration {
   711  	sessionIdleExpirySeconds := DEFAULT_IDLE_SESSION_EXPIRY_SECONDS
   712  	if server.config.SessionIdleExpirySeconds > 2 {
   713  		sessionIdleExpirySeconds = server.config.SessionIdleExpirySeconds
   714  	}
   715  	return time.Duration(sessionIdleExpirySeconds) * time.Second
   716  }
   717  
   718  func (server *Server) removeSession(session *session) {
   719  	server.sessionIDToIndex.Delete(session.sessionID)
   720  	server.indexToSession.Delete(session.index)
   721  	server.interruptSession(session)
   722  
   723  	// Delete flows to ensure any pending flow metrics are reported.
   724  	session.deleteFlows()
   725  }
   726  
   727  func (server *Server) runOrphanMetricsCheckpointer() {
   728  
   729  	defer server.workers.Done()
   730  
   731  	// Periodically log orphan packet metrics. Orphan metrics
   732  	// are not associated with any session. This includes
   733  	// packets that are rejected before they can be associated
   734  	// with a session.
   735  
   736  	ticker := time.NewTicker(ORPHAN_METRICS_CHECKPOINTER_PERIOD)
   737  	defer ticker.Stop()
   738  
   739  	for {
   740  		done := false
   741  		select {
   742  		case <-ticker.C:
   743  		case <-server.runContext.Done():
   744  			done = true
   745  		}
   746  
   747  		// TODO: skip log if all zeros?
   748  		server.orphanMetrics.checkpoint(
   749  			server.config.Logger,
   750  			nil,
   751  			"server_orphan_packet_metrics",
   752  			packetMetricsRejected)
   753  		if done {
   754  			return
   755  		}
   756  	}
   757  }
   758  
   759  func (server *Server) runDeviceDownstream() {
   760  
   761  	defer server.workers.Done()
   762  
   763  	// Read incoming packets from the tun device, parse and validate the
   764  	// packets, map them to a session/client, perform rewriting, and relay
   765  	// the packets to the client.
   766  
   767  	for {
   768  		readPacket, err := server.device.ReadPacket()
   769  
   770  		select {
   771  		case <-server.runContext.Done():
   772  			// No error is logged as shutdown may have interrupted read.
   773  			return
   774  		default:
   775  		}
   776  
   777  		if err != nil {
   778  			server.config.Logger.WithTraceFields(
   779  				common.LogFields{"error": err}).Warning("read device packet failed")
   780  			// May be temporary error condition, keep reading.
   781  			continue
   782  		}
   783  
   784  		// destinationIPAddress determines which client receives this packet.
   785  		// At this point, only enough of the packet is inspected to determine
   786  		// this routing info; further validation happens in subsequent
   787  		// processPacket in runClientDownstream.
   788  
   789  		// Note that masquerading/NAT stands between the Internet and the tun
   790  		// device, so arbitrary packets cannot be sent through to this point.
   791  
   792  		// TODO: getPacketDestinationIPAddress and processPacket perform redundant
   793  		// packet parsing; refactor to avoid extra work?
   794  
   795  		destinationIPAddress, ok := getPacketDestinationIPAddress(
   796  			server.orphanMetrics, packetDirectionServerDownstream, readPacket)
   797  
   798  		if !ok {
   799  			// Packet is dropped. Reason will be counted in orphan metrics.
   800  			continue
   801  		}
   802  
   803  		// Map destination IP address to client session.
   804  
   805  		index := server.convertIPAddressToIndex(destinationIPAddress)
   806  		s, ok := server.indexToSession.Load(index)
   807  
   808  		if !ok {
   809  			server.orphanMetrics.rejectedPacket(
   810  				packetDirectionServerDownstream, packetRejectNoSession)
   811  			continue
   812  		}
   813  
   814  		session := s.(*session)
   815  
   816  		downstreamPackets := session.getDownstreamPackets()
   817  
   818  		// No downstreamPackets buffer is maintained when no client is
   819  		// connected, so the packet is dropped.
   820  
   821  		if downstreamPackets == nil {
   822  			server.orphanMetrics.rejectedPacket(
   823  				packetDirectionServerDownstream, packetRejectNoClient)
   824  			continue
   825  		}
   826  
   827  		// Simply enqueue the packet for client handling, and move on to
   828  		// read the next packet. The packet tunnel server multiplexes all
   829  		// client packets through a single tun device, so we must not block
   830  		// on client channel I/O here.
   831  		//
   832  		// When the queue is full, the packet is dropped. This is standard
   833  		// behavior for routers, VPN servers, etc.
   834  		//
   835  		// TODO: processPacket is performed here, instead of runClientDownstream,
   836  		// since packets are packed contiguously into the packet queue and if
   837  		// the packet it to be omitted, that should be done before enqueuing.
   838  		// The potential downside is that all packet processing is done in this
   839  		// single thread of execution, blocking the next packet for the next
   840  		// client. Try handing off the packet to another worker which will
   841  		// call processPacket and Enqueue?
   842  
   843  		// In downstream mode, processPacket rewrites the destination address
   844  		// to the original client source IP address, and also rewrites DNS
   845  		// packets. As documented in runClientUpstream, the original address
   846  		// should already be populated via an upstream packet; if not, the
   847  		// packet will be rejected.
   848  
   849  		if !processPacket(
   850  			session.metrics,
   851  			session,
   852  			packetDirectionServerDownstream,
   853  			readPacket) {
   854  			// Packet is rejected and dropped. Reason will be counted in metrics.
   855  			continue
   856  		}
   857  
   858  		downstreamPackets.Enqueue(readPacket)
   859  	}
   860  }
   861  
   862  func (server *Server) runClientUpstream(session *session) {
   863  
   864  	defer session.workers.Done()
   865  
   866  	// Read incoming packets from the client channel, validate the packets,
   867  	// perform rewriting, and send them through to the tun device.
   868  
   869  	for {
   870  		readPacket, err := session.channel.ReadPacket()
   871  
   872  		select {
   873  		case <-session.runContext.Done():
   874  			// No error is logged as shutdown may have interrupted read.
   875  			return
   876  		default:
   877  		}
   878  
   879  		if err != nil {
   880  
   881  			// Debug since channel I/O errors occur during normal operation.
   882  			server.config.Logger.WithTraceFields(
   883  				common.LogFields{"error": err}).Debug("read channel packet failed")
   884  
   885  			// Tear down the session. Must be invoked asynchronously.
   886  			go server.interruptSession(session)
   887  
   888  			return
   889  		}
   890  
   891  		session.touch()
   892  
   893  		// processPacket transparently rewrites the source address to the
   894  		// session's assigned address and rewrites the destination of any
   895  		// DNS packets destined to the target DNS resolver.
   896  		//
   897  		// The first time the source address is rewritten, the original
   898  		// value is recorded so inbound packets can have the reverse
   899  		// rewrite applied. This assumes that the client will send a
   900  		// packet before receiving any packet, which is the case since
   901  		// only clients can initiate TCP or UDP connections or flows.
   902  
   903  		if !processPacket(
   904  			session.metrics,
   905  			session,
   906  			packetDirectionServerUpstream,
   907  			readPacket) {
   908  
   909  			// Packet is rejected and dropped. Reason will be counted in metrics.
   910  			continue
   911  		}
   912  
   913  		err = server.device.WritePacket(readPacket)
   914  
   915  		if err != nil {
   916  			server.config.Logger.WithTraceFields(
   917  				common.LogFields{"error": err}).Warning("write device packet failed")
   918  			// May be temporary error condition, keep working. The packet is
   919  			// most likely dropped.
   920  			continue
   921  		}
   922  	}
   923  }
   924  
   925  func (server *Server) runClientDownstream(session *session) {
   926  
   927  	defer session.workers.Done()
   928  
   929  	// Dequeue, process, and relay packets to be sent to the client channel.
   930  
   931  	for {
   932  
   933  		downstreamPackets := session.getDownstreamPackets()
   934  		// Note: downstreamPackets will not be nil, since this goroutine only
   935  		// runs while the session has a connected client.
   936  
   937  		packetBuffer, ok := downstreamPackets.DequeueFramedPackets(session.runContext)
   938  		if !ok {
   939  			// Dequeue aborted due to session.runContext.Done()
   940  			return
   941  		}
   942  
   943  		err := session.channel.WriteFramedPackets(packetBuffer)
   944  		if err != nil {
   945  
   946  			// Debug since channel I/O errors occur during normal operation.
   947  			server.config.Logger.WithTraceFields(
   948  				common.LogFields{"error": err}).Debug("write channel packets failed")
   949  
   950  			downstreamPackets.Replace(packetBuffer)
   951  
   952  			// Tear down the session. Must be invoked asynchronously.
   953  			go server.interruptSession(session)
   954  
   955  			return
   956  		}
   957  
   958  		session.touch()
   959  
   960  		downstreamPackets.Replace(packetBuffer)
   961  	}
   962  }
   963  
   964  var (
   965  	serverIPv4AddressCIDR             = "10.0.0.1/8"
   966  	transparentDNSResolverIPv4Address = net.ParseIP("10.0.0.2").To4() // 4-byte for rewriting
   967  	_, privateSubnetIPv4, _           = net.ParseCIDR("10.0.0.0/8")
   968  	assignedIPv4AddressTemplate       = "10.%d.%d.%d"
   969  
   970  	serverIPv6AddressCIDR             = "fd19:ca83:e6d5:1c44:0000:0000:0000:0001/64"
   971  	transparentDNSResolverIPv6Address = net.ParseIP("fd19:ca83:e6d5:1c44:0000:0000:0000:0002")
   972  	_, privateSubnetIPv6, _           = net.ParseCIDR("fd19:ca83:e6d5:1c44::/64")
   973  	assignedIPv6AddressTemplate       = "fd19:ca83:e6d5:1c44:8c57:4434:ee%02x:%02x%02x"
   974  )
   975  
   976  func (server *Server) allocateIndex(newSession *session) error {
   977  
   978  	// Find and assign an available index in the 24-bit index space.
   979  	// The index directly maps to and so determines the assigned
   980  	// IPv4 and IPv6 addresses.
   981  
   982  	// Search is a random index selection followed by a linear probe.
   983  	// TODO: is this the most effective (fast on average, simple) algorithm?
   984  
   985  	max := 0x00FFFFFF
   986  
   987  	randomInt := prng.Intn(max + 1)
   988  
   989  	index := int32(randomInt)
   990  	index &= int32(max)
   991  
   992  	idleExpiry := server.sessionIdleExpiry()
   993  
   994  	for tries := 0; tries < 100000; index++ {
   995  
   996  		tries++
   997  
   998  		// The index/address space isn't exactly 24-bits:
   999  		// - 0 and 0x00FFFFFF are reserved since they map to
  1000  		//   the network identifier (10.0.0.0) and broadcast
  1001  		//   address (10.255.255.255) respectively
  1002  		// - 1 is reserved as the server tun device address,
  1003  		//   (10.0.0.1, and IPv6 equivalent)
  1004  		// - 2 is reserved as the transparent DNS target
  1005  		//   address (10.0.0.2, and IPv6 equivalent)
  1006  
  1007  		if index <= 2 {
  1008  			continue
  1009  		}
  1010  		if index == 0x00FFFFFF {
  1011  			index = 0
  1012  			continue
  1013  		}
  1014  
  1015  		IPv4Address := server.convertIndexToIPv4Address(index).To4()
  1016  		IPv6Address := server.convertIndexToIPv6Address(index)
  1017  
  1018  		// Ensure that the index converts to valid IPs. This is not expected
  1019  		// to fail, but continuing with nil IPs will silently misroute
  1020  		// packets with rewritten source IPs.
  1021  		if IPv4Address == nil || IPv6Address == nil {
  1022  			server.config.Logger.WithTraceFields(
  1023  				common.LogFields{"index": index}).Warning("convert index to IP address failed")
  1024  			continue
  1025  		}
  1026  
  1027  		if s, ok := server.indexToSession.LoadOrStore(index, newSession); ok {
  1028  			// Index is already in use or acquired concurrently.
  1029  			// If the existing session is expired, reap it and try again
  1030  			// to acquire it.
  1031  			existingSession := s.(*session)
  1032  			if existingSession.expired(idleExpiry) {
  1033  				server.removeSession(existingSession)
  1034  				// Try to acquire this index again. We can't fall through and
  1035  				// use this index as removeSession has cleared indexToSession.
  1036  				index--
  1037  			}
  1038  			continue
  1039  		}
  1040  
  1041  		// Note: the To4() for assignedIPv4Address is essential since
  1042  		// that address value is assumed to be 4 bytes when rewriting.
  1043  
  1044  		newSession.index = index
  1045  		newSession.assignedIPv4Address = IPv4Address
  1046  		newSession.assignedIPv6Address = IPv6Address
  1047  		server.sessionIDToIndex.Store(newSession.sessionID, index)
  1048  
  1049  		server.resetRouting(newSession.assignedIPv4Address, newSession.assignedIPv6Address)
  1050  
  1051  		return nil
  1052  	}
  1053  
  1054  	return errors.TraceNew("unallocated index not found")
  1055  }
  1056  
  1057  func (server *Server) resetRouting(IPv4Address, IPv6Address net.IP) {
  1058  
  1059  	// Attempt to clear the NAT table of any existing connection
  1060  	// states. This will prevent the (already unlikely) delivery
  1061  	// of packets to the wrong client when an assigned IP address is
  1062  	// recycled. Silently has no effect on some platforms, see
  1063  	// resetNATTables implementations.
  1064  
  1065  	err := resetNATTables(server.config, IPv4Address)
  1066  	if err != nil {
  1067  		server.config.Logger.WithTraceFields(
  1068  			common.LogFields{"error": err}).Warning("reset IPv4 routing failed")
  1069  
  1070  	}
  1071  
  1072  	err = resetNATTables(server.config, IPv6Address)
  1073  	if err != nil {
  1074  		server.config.Logger.WithTraceFields(
  1075  			common.LogFields{"error": err}).Warning("reset IPv6 routing failed")
  1076  
  1077  	}
  1078  }
  1079  
  1080  func (server *Server) convertIPAddressToIndex(IP net.IP) int32 {
  1081  	// Assumes IP is at least 3 bytes.
  1082  	size := len(IP)
  1083  	return int32(IP[size-3])<<16 | int32(IP[size-2])<<8 | int32(IP[size-1])
  1084  }
  1085  
  1086  func (server *Server) convertIndexToIPv4Address(index int32) net.IP {
  1087  	return net.ParseIP(
  1088  		fmt.Sprintf(
  1089  			assignedIPv4AddressTemplate,
  1090  			(index>>16)&0xFF,
  1091  			(index>>8)&0xFF,
  1092  			index&0xFF))
  1093  }
  1094  
  1095  func (server *Server) convertIndexToIPv6Address(index int32) net.IP {
  1096  	return net.ParseIP(
  1097  		fmt.Sprintf(
  1098  			assignedIPv6AddressTemplate,
  1099  			(index>>16)&0xFF,
  1100  			(index>>8)&0xFF,
  1101  			index&0xFF))
  1102  }
  1103  
  1104  // GetTransparentDNSResolverIPv4Address returns the static IPv4 address
  1105  // to use as a DNS resolver when transparent DNS rewriting is desired.
  1106  func GetTransparentDNSResolverIPv4Address() net.IP {
  1107  	return transparentDNSResolverIPv4Address
  1108  }
  1109  
  1110  // GetTransparentDNSResolverIPv6Address returns the static IPv6 address
  1111  // to use as a DNS resolver when transparent DNS rewriting is desired.
  1112  func GetTransparentDNSResolverIPv6Address() net.IP {
  1113  	return transparentDNSResolverIPv6Address
  1114  }
  1115  
  1116  type session struct {
  1117  	// Note: 64-bit ints used with atomic operations are placed
  1118  	// at the start of struct to ensure 64-bit alignment.
  1119  	// (https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
  1120  	lastActivity             int64
  1121  	lastFlowReapIndex        int64
  1122  	downstreamPackets        unsafe.Pointer
  1123  	checkAllowedTCPPortFunc  unsafe.Pointer
  1124  	checkAllowedUDPPortFunc  unsafe.Pointer
  1125  	checkAllowedDomainFunc   unsafe.Pointer
  1126  	flowActivityUpdaterMaker unsafe.Pointer
  1127  	metricsUpdater           unsafe.Pointer
  1128  	dnsQualityReporter       unsafe.Pointer
  1129  
  1130  	allowBogons              bool
  1131  	metrics                  *packetMetrics
  1132  	sessionID                string
  1133  	index                    int32
  1134  	enableDNSFlowTracking    bool
  1135  	DNSResolverIPv4Addresses []net.IP
  1136  	TCPDNSResolverIPv4Index  int
  1137  	assignedIPv4Address      net.IP
  1138  	setOriginalIPv4Address   int32
  1139  	originalIPv4Address      net.IP
  1140  	DNSResolverIPv6Addresses []net.IP
  1141  	TCPDNSResolverIPv6Index  int
  1142  	assignedIPv6Address      net.IP
  1143  	setOriginalIPv6Address   int32
  1144  	originalIPv6Address      net.IP
  1145  	flows                    sync.Map
  1146  	workers                  *sync.WaitGroup
  1147  	mutex                    sync.Mutex
  1148  	channel                  *Channel
  1149  	runContext               context.Context
  1150  	stopRunning              context.CancelFunc
  1151  }
  1152  
  1153  func (session *session) touch() {
  1154  	atomic.StoreInt64(&session.lastActivity, int64(monotime.Now()))
  1155  }
  1156  
  1157  func (session *session) expired(idleExpiry time.Duration) bool {
  1158  	lastActivity := monotime.Time(atomic.LoadInt64(&session.lastActivity))
  1159  	return monotime.Since(lastActivity) > idleExpiry
  1160  }
  1161  
  1162  func (session *session) setOriginalIPv4AddressIfNotSet(IPAddress net.IP) {
  1163  	if !atomic.CompareAndSwapInt32(&session.setOriginalIPv4Address, 0, 1) {
  1164  		return
  1165  	}
  1166  	// Make a copy of IPAddress; don't reference a slice of a reusable
  1167  	// packet buffer, which will be overwritten.
  1168  	session.originalIPv4Address = net.IP(append([]byte(nil), []byte(IPAddress)...))
  1169  }
  1170  
  1171  func (session *session) getOriginalIPv4Address() net.IP {
  1172  	if atomic.LoadInt32(&session.setOriginalIPv4Address) == 0 {
  1173  		return nil
  1174  	}
  1175  	return session.originalIPv4Address
  1176  }
  1177  
  1178  func (session *session) setOriginalIPv6AddressIfNotSet(IPAddress net.IP) {
  1179  	if !atomic.CompareAndSwapInt32(&session.setOriginalIPv6Address, 0, 1) {
  1180  		return
  1181  	}
  1182  	// Make a copy of IPAddress.
  1183  	session.originalIPv6Address = net.IP(append([]byte(nil), []byte(IPAddress)...))
  1184  }
  1185  
  1186  func (session *session) getOriginalIPv6Address() net.IP {
  1187  	if atomic.LoadInt32(&session.setOriginalIPv6Address) == 0 {
  1188  		return nil
  1189  	}
  1190  	return session.originalIPv6Address
  1191  }
  1192  
  1193  func (session *session) setDownstreamPackets(p *PacketQueue) {
  1194  	atomic.StorePointer(&session.downstreamPackets, unsafe.Pointer(p))
  1195  }
  1196  
  1197  func (session *session) getDownstreamPackets() *PacketQueue {
  1198  	return (*PacketQueue)(atomic.LoadPointer(&session.downstreamPackets))
  1199  }
  1200  
  1201  func (session *session) setCheckAllowedTCPPortFunc(p *AllowedPortChecker) {
  1202  	atomic.StorePointer(&session.checkAllowedTCPPortFunc, unsafe.Pointer(p))
  1203  }
  1204  
  1205  func (session *session) getCheckAllowedTCPPortFunc() AllowedPortChecker {
  1206  	p := (*AllowedPortChecker)(atomic.LoadPointer(&session.checkAllowedTCPPortFunc))
  1207  	if p == nil {
  1208  		return nil
  1209  	}
  1210  	return *p
  1211  }
  1212  
  1213  func (session *session) setCheckAllowedUDPPortFunc(p *AllowedPortChecker) {
  1214  	atomic.StorePointer(&session.checkAllowedUDPPortFunc, unsafe.Pointer(p))
  1215  }
  1216  
  1217  func (session *session) getCheckAllowedUDPPortFunc() AllowedPortChecker {
  1218  	p := (*AllowedPortChecker)(atomic.LoadPointer(&session.checkAllowedUDPPortFunc))
  1219  	if p == nil {
  1220  		return nil
  1221  	}
  1222  	return *p
  1223  }
  1224  
  1225  func (session *session) setCheckAllowedDomainFunc(p *AllowedDomainChecker) {
  1226  	atomic.StorePointer(&session.checkAllowedDomainFunc, unsafe.Pointer(p))
  1227  }
  1228  
  1229  func (session *session) getCheckAllowedDomainFunc() AllowedDomainChecker {
  1230  	p := (*AllowedDomainChecker)(atomic.LoadPointer(&session.checkAllowedDomainFunc))
  1231  	if p == nil {
  1232  		return nil
  1233  	}
  1234  	return *p
  1235  }
  1236  
  1237  func (session *session) setFlowActivityUpdaterMaker(p *FlowActivityUpdaterMaker) {
  1238  	atomic.StorePointer(&session.flowActivityUpdaterMaker, unsafe.Pointer(p))
  1239  }
  1240  
  1241  func (session *session) getFlowActivityUpdaterMaker() FlowActivityUpdaterMaker {
  1242  	p := (*FlowActivityUpdaterMaker)(atomic.LoadPointer(&session.flowActivityUpdaterMaker))
  1243  	if p == nil {
  1244  		return nil
  1245  	}
  1246  	return *p
  1247  }
  1248  
  1249  func (session *session) setMetricsUpdater(p *MetricsUpdater) {
  1250  	atomic.StorePointer(&session.metricsUpdater, unsafe.Pointer(p))
  1251  }
  1252  
  1253  func (session *session) getMetricsUpdater() MetricsUpdater {
  1254  	p := (*MetricsUpdater)(atomic.LoadPointer(&session.metricsUpdater))
  1255  	if p == nil {
  1256  		return nil
  1257  	}
  1258  	return *p
  1259  }
  1260  
  1261  func (session *session) setDNSQualityReporter(p *DNSQualityReporter) {
  1262  	atomic.StorePointer(&session.dnsQualityReporter, unsafe.Pointer(p))
  1263  }
  1264  
  1265  func (session *session) getDNSQualityReporter() DNSQualityReporter {
  1266  	p := (*DNSQualityReporter)(atomic.LoadPointer(&session.dnsQualityReporter))
  1267  	if p == nil {
  1268  		return nil
  1269  	}
  1270  	return *p
  1271  }
  1272  
  1273  // flowID identifies an IP traffic flow using the conventional
  1274  // network 5-tuple. flowIDs track bidirectional flows.
  1275  type flowID struct {
  1276  	downstreamIPAddress [net.IPv6len]byte
  1277  	downstreamPort      uint16
  1278  	upstreamIPAddress   [net.IPv6len]byte
  1279  	upstreamPort        uint16
  1280  	protocol            internetProtocol
  1281  }
  1282  
  1283  // From: https://github.com/golang/go/blob/b88efc7e7ac15f9e0b5d8d9c82f870294f6a3839/src/net/ip.go#L55
  1284  var v4InV6Prefix = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}
  1285  
  1286  func (f *flowID) set(
  1287  	downstreamIPAddress net.IP,
  1288  	downstreamPort uint16,
  1289  	upstreamIPAddress net.IP,
  1290  	upstreamPort uint16,
  1291  	protocol internetProtocol) {
  1292  
  1293  	if len(downstreamIPAddress) == net.IPv4len {
  1294  		copy(f.downstreamIPAddress[:], v4InV6Prefix)
  1295  		copy(f.downstreamIPAddress[len(v4InV6Prefix):], downstreamIPAddress)
  1296  	} else { // net.IPv6len
  1297  		copy(f.downstreamIPAddress[:], downstreamIPAddress)
  1298  	}
  1299  	f.downstreamPort = downstreamPort
  1300  
  1301  	if len(upstreamIPAddress) == net.IPv4len {
  1302  		copy(f.upstreamIPAddress[:], v4InV6Prefix)
  1303  		copy(f.upstreamIPAddress[len(v4InV6Prefix):], upstreamIPAddress)
  1304  	} else { // net.IPv6len
  1305  		copy(f.upstreamIPAddress[:], upstreamIPAddress)
  1306  	}
  1307  	f.upstreamPort = upstreamPort
  1308  
  1309  	f.protocol = protocol
  1310  }
  1311  
  1312  type flowState struct {
  1313  	// Note: 64-bit ints used with atomic operations are placed
  1314  	// at the start of struct to ensure 64-bit alignment.
  1315  	// (https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
  1316  	firstUpstreamPacketTime   int64
  1317  	lastUpstreamPacketTime    int64
  1318  	firstDownstreamPacketTime int64
  1319  	lastDownstreamPacketTime  int64
  1320  	isDNS                     bool
  1321  	dnsQualityReporter        DNSQualityReporter
  1322  	activityUpdaters          []FlowActivityUpdater
  1323  }
  1324  
  1325  func (flowState *flowState) expired(idleExpiry time.Duration) bool {
  1326  	now := monotime.Now()
  1327  
  1328  	// Traffic in either direction keeps the flow alive. Initially, only one of
  1329  	// lastUpstreamPacketTime or lastDownstreamPacketTime will be set by
  1330  	// startTrackingFlow, and the other value will be 0 and evaluate as expired.
  1331  
  1332  	return (now.Sub(monotime.Time(atomic.LoadInt64(&flowState.lastUpstreamPacketTime))) > idleExpiry) &&
  1333  		(now.Sub(monotime.Time(atomic.LoadInt64(&flowState.lastDownstreamPacketTime))) > idleExpiry)
  1334  }
  1335  
  1336  // isTrackingFlow checks if a flow is being tracked.
  1337  func (session *session) isTrackingFlow(ID flowID) bool {
  1338  
  1339  	f, ok := session.flows.Load(ID)
  1340  	if !ok {
  1341  		return false
  1342  	}
  1343  	flowState := f.(*flowState)
  1344  
  1345  	// Check if flow is expired but not yet reaped.
  1346  	if flowState.expired(FLOW_IDLE_EXPIRY) {
  1347  		session.deleteFlow(ID, flowState)
  1348  		return false
  1349  	}
  1350  
  1351  	return true
  1352  }
  1353  
  1354  // startTrackingFlow starts flow tracking for the flow identified
  1355  // by ID.
  1356  //
  1357  // Flow tracking is used to implement:
  1358  // - one-time permissions checks for a flow
  1359  // - OSLs
  1360  // - domain bytes transferred [TODO]
  1361  // - DNS quality metrics
  1362  //
  1363  // The applicationData from the first packet in the flow is
  1364  // inspected to determine any associated hostname, using HTTP or
  1365  // TLS payload. The session's FlowActivityUpdaterMaker is invoked
  1366  // to determine a list of updaters to track flow activity.
  1367  //
  1368  // Updaters receive reports with the number of application data
  1369  // bytes in each flow packet. This number, totalled for all packets
  1370  // in a flow, may exceed the total bytes transferred at the
  1371  // application level due to TCP retransmission. Currently, the flow
  1372  // tracking logic doesn't exclude retransmitted packets from update
  1373  // reporting.
  1374  //
  1375  // Flows are untracked after an idle expiry period. Transport
  1376  // protocol indicators of end of flow, such as FIN or RST for TCP,
  1377  // which may or may not appear in a flow, are not currently used.
  1378  //
  1379  // startTrackingFlow may be called from concurrent goroutines; if
  1380  // the flow is already tracked, it is simply updated.
  1381  func (session *session) startTrackingFlow(
  1382  	ID flowID,
  1383  	direction packetDirection,
  1384  	applicationData []byte,
  1385  	isDNS bool) {
  1386  
  1387  	now := int64(monotime.Now())
  1388  
  1389  	// Once every period, iterate over flows and reap expired entries.
  1390  	reapIndex := now / int64(monotime.Time(FLOW_IDLE_EXPIRY/2))
  1391  	previousReapIndex := atomic.LoadInt64(&session.lastFlowReapIndex)
  1392  	if reapIndex != previousReapIndex &&
  1393  		atomic.CompareAndSwapInt64(&session.lastFlowReapIndex, previousReapIndex, reapIndex) {
  1394  		session.reapFlows()
  1395  	}
  1396  
  1397  	var isTCP bool
  1398  	var hostname string
  1399  	if ID.protocol == internetProtocolTCP {
  1400  		// TODO: implement
  1401  		// hostname = common.ExtractHostnameFromTCPFlow(applicationData)
  1402  		isTCP = true
  1403  	}
  1404  
  1405  	var activityUpdaters []FlowActivityUpdater
  1406  
  1407  	// Don't incur activity monitor overhead for DNS requests
  1408  	if !isDNS {
  1409  		flowActivityUpdaterMaker := session.getFlowActivityUpdaterMaker()
  1410  		if flowActivityUpdaterMaker != nil {
  1411  			activityUpdaters = flowActivityUpdaterMaker(
  1412  				isTCP,
  1413  				hostname,
  1414  				net.IP(ID.upstreamIPAddress[:]))
  1415  		}
  1416  	}
  1417  
  1418  	flowState := &flowState{
  1419  		isDNS:              isDNS,
  1420  		activityUpdaters:   activityUpdaters,
  1421  		dnsQualityReporter: session.getDNSQualityReporter(),
  1422  	}
  1423  
  1424  	if direction == packetDirectionServerUpstream {
  1425  		flowState.firstUpstreamPacketTime = now
  1426  		flowState.lastUpstreamPacketTime = now
  1427  	} else {
  1428  		flowState.firstDownstreamPacketTime = now
  1429  		flowState.lastDownstreamPacketTime = now
  1430  	}
  1431  
  1432  	// LoadOrStore will retain any existing entry
  1433  	session.flows.LoadOrStore(ID, flowState)
  1434  
  1435  	session.updateFlow(ID, direction, applicationData)
  1436  }
  1437  
  1438  func (session *session) updateFlow(
  1439  	ID flowID,
  1440  	direction packetDirection,
  1441  	applicationData []byte) {
  1442  
  1443  	f, ok := session.flows.Load(ID)
  1444  	if !ok {
  1445  		return
  1446  	}
  1447  	flowState := f.(*flowState)
  1448  
  1449  	// Note: no expired check here, since caller is assumed to
  1450  	// have just called isTrackingFlow.
  1451  
  1452  	now := int64(monotime.Now())
  1453  	var upstreamBytes, downstreamBytes, durationNanoseconds int64
  1454  
  1455  	if direction == packetDirectionServerUpstream {
  1456  		upstreamBytes = int64(len(applicationData))
  1457  
  1458  		atomic.CompareAndSwapInt64(&flowState.firstUpstreamPacketTime, 0, now)
  1459  
  1460  		atomic.StoreInt64(&flowState.lastUpstreamPacketTime, now)
  1461  
  1462  	} else {
  1463  		downstreamBytes = int64(len(applicationData))
  1464  
  1465  		atomic.CompareAndSwapInt64(&flowState.firstDownstreamPacketTime, 0, now)
  1466  
  1467  		// Follows common.ActivityMonitoredConn semantics, where
  1468  		// duration is updated only for downstream activity. This
  1469  		// is intened to produce equivalent behaviour for port
  1470  		// forward clients (tracked with ActivityUpdaters) and
  1471  		// packet tunnel clients (tracked with FlowActivityUpdaters).
  1472  
  1473  		durationNanoseconds = now - atomic.SwapInt64(&flowState.lastDownstreamPacketTime, now)
  1474  	}
  1475  
  1476  	for _, updater := range flowState.activityUpdaters {
  1477  		updater.UpdateProgress(downstreamBytes, upstreamBytes, durationNanoseconds)
  1478  	}
  1479  }
  1480  
  1481  // deleteFlow stops tracking a flow and logs any outstanding metrics.
  1482  // flowState is passed in to avoid duplicating the lookup that all callers
  1483  // have already performed.
  1484  func (session *session) deleteFlow(ID flowID, flowState *flowState) {
  1485  
  1486  	if flowState.isDNS {
  1487  
  1488  		dnsStartTime := monotime.Time(
  1489  			atomic.LoadInt64(&flowState.firstUpstreamPacketTime))
  1490  
  1491  		if dnsStartTime > 0 {
  1492  
  1493  			// Record DNS quality metrics using a heuristic: if a packet was sent and
  1494  			// then a packet was received, assume the DNS request successfully received
  1495  			// a valid response; failure occurs when the resolver fails to provide a
  1496  			// response; a "no such host" response is still a success. Limitations: we
  1497  			// assume a resolver will not respond when, e.g., rate limiting; we ignore
  1498  			// subsequent requests made via the same UDP/TCP flow; deleteFlow may be
  1499  			// called only after the flow has expired, which adds some delay to the
  1500  			// recording of the DNS metric.
  1501  
  1502  			dnsEndTime := monotime.Time(
  1503  				atomic.LoadInt64(&flowState.firstDownstreamPacketTime))
  1504  
  1505  			dnsSuccess := true
  1506  			if dnsEndTime == 0 {
  1507  				dnsSuccess = false
  1508  				dnsEndTime = monotime.Now()
  1509  			}
  1510  
  1511  			resolveElapsedTime := dnsEndTime.Sub(dnsStartTime)
  1512  
  1513  			if flowState.dnsQualityReporter != nil {
  1514  				flowState.dnsQualityReporter(
  1515  					dnsSuccess,
  1516  					resolveElapsedTime,
  1517  					net.IP(ID.upstreamIPAddress[:]))
  1518  			}
  1519  		}
  1520  	}
  1521  
  1522  	session.flows.Delete(ID)
  1523  }
  1524  
  1525  // reapFlows removes expired idle flows.
  1526  func (session *session) reapFlows() {
  1527  	session.flows.Range(func(key, value interface{}) bool {
  1528  		flowState := value.(*flowState)
  1529  		if flowState.expired(FLOW_IDLE_EXPIRY) {
  1530  			session.deleteFlow(key.(flowID), flowState)
  1531  		}
  1532  		return true
  1533  	})
  1534  }
  1535  
  1536  // deleteFlows deletes all flows.
  1537  func (session *session) deleteFlows() {
  1538  	session.flows.Range(func(key, value interface{}) bool {
  1539  		session.deleteFlow(key.(flowID), value.(*flowState))
  1540  		return true
  1541  	})
  1542  }
  1543  
  1544  type packetMetrics struct {
  1545  	upstreamRejectReasons   [packetRejectReasonCount]int64
  1546  	downstreamRejectReasons [packetRejectReasonCount]int64
  1547  	TCPIPv4                 relayedPacketMetrics
  1548  	TCPIPv6                 relayedPacketMetrics
  1549  	UDPIPv4                 relayedPacketMetrics
  1550  	UDPIPv6                 relayedPacketMetrics
  1551  }
  1552  
  1553  type relayedPacketMetrics struct {
  1554  	packetsUp            int64
  1555  	packetsDown          int64
  1556  	bytesUp              int64
  1557  	bytesDown            int64
  1558  	applicationBytesUp   int64
  1559  	applicationBytesDown int64
  1560  }
  1561  
  1562  func (metrics *packetMetrics) rejectedPacket(
  1563  	direction packetDirection,
  1564  	reason packetRejectReason) {
  1565  
  1566  	if direction == packetDirectionServerUpstream ||
  1567  		direction == packetDirectionClientUpstream {
  1568  
  1569  		atomic.AddInt64(&metrics.upstreamRejectReasons[reason], 1)
  1570  
  1571  	} else { // packetDirectionDownstream
  1572  
  1573  		atomic.AddInt64(&metrics.downstreamRejectReasons[reason], 1)
  1574  
  1575  	}
  1576  }
  1577  
  1578  func (metrics *packetMetrics) relayedPacket(
  1579  	direction packetDirection,
  1580  	version int,
  1581  	protocol internetProtocol,
  1582  	packetLength, applicationDataLength int) {
  1583  
  1584  	var packetsMetric, bytesMetric, applicationBytesMetric *int64
  1585  
  1586  	if direction == packetDirectionServerUpstream ||
  1587  		direction == packetDirectionClientUpstream {
  1588  
  1589  		if version == 4 {
  1590  
  1591  			if protocol == internetProtocolTCP {
  1592  				packetsMetric = &metrics.TCPIPv4.packetsUp
  1593  				bytesMetric = &metrics.TCPIPv4.bytesUp
  1594  				applicationBytesMetric = &metrics.TCPIPv4.applicationBytesUp
  1595  			} else { // UDP
  1596  				packetsMetric = &metrics.UDPIPv4.packetsUp
  1597  				bytesMetric = &metrics.UDPIPv4.bytesUp
  1598  				applicationBytesMetric = &metrics.UDPIPv4.applicationBytesUp
  1599  			}
  1600  
  1601  		} else { // IPv6
  1602  
  1603  			if protocol == internetProtocolTCP {
  1604  				packetsMetric = &metrics.TCPIPv6.packetsUp
  1605  				bytesMetric = &metrics.TCPIPv6.bytesUp
  1606  				applicationBytesMetric = &metrics.TCPIPv6.applicationBytesUp
  1607  			} else { // UDP
  1608  				packetsMetric = &metrics.UDPIPv6.packetsUp
  1609  				bytesMetric = &metrics.UDPIPv6.bytesUp
  1610  				applicationBytesMetric = &metrics.UDPIPv6.applicationBytesUp
  1611  			}
  1612  		}
  1613  
  1614  	} else { // packetDirectionDownstream
  1615  
  1616  		if version == 4 {
  1617  
  1618  			if protocol == internetProtocolTCP {
  1619  				packetsMetric = &metrics.TCPIPv4.packetsDown
  1620  				bytesMetric = &metrics.TCPIPv4.bytesDown
  1621  				applicationBytesMetric = &metrics.TCPIPv4.applicationBytesDown
  1622  			} else { // UDP
  1623  				packetsMetric = &metrics.UDPIPv4.packetsDown
  1624  				bytesMetric = &metrics.UDPIPv4.bytesDown
  1625  				applicationBytesMetric = &metrics.UDPIPv4.applicationBytesDown
  1626  			}
  1627  
  1628  		} else { // IPv6
  1629  
  1630  			if protocol == internetProtocolTCP {
  1631  				packetsMetric = &metrics.TCPIPv6.packetsDown
  1632  				bytesMetric = &metrics.TCPIPv6.bytesDown
  1633  				applicationBytesMetric = &metrics.TCPIPv6.applicationBytesDown
  1634  			} else { // UDP
  1635  				packetsMetric = &metrics.UDPIPv6.packetsDown
  1636  				bytesMetric = &metrics.UDPIPv6.bytesDown
  1637  				applicationBytesMetric = &metrics.UDPIPv6.applicationBytesDown
  1638  			}
  1639  		}
  1640  	}
  1641  
  1642  	atomic.AddInt64(packetsMetric, 1)
  1643  	atomic.AddInt64(bytesMetric, int64(packetLength))
  1644  	atomic.AddInt64(applicationBytesMetric, int64(applicationDataLength))
  1645  }
  1646  
  1647  const (
  1648  	packetMetricsRejected = 1
  1649  	packetMetricsRelayed  = 2
  1650  	packetMetricsAll      = packetMetricsRejected | packetMetricsRelayed
  1651  )
  1652  
  1653  func (metrics *packetMetrics) checkpoint(
  1654  	logger common.Logger, updater MetricsUpdater, logName string, whichMetrics int) {
  1655  
  1656  	// Report all metric counters in a single log message. Each
  1657  	// counter is reset to 0 when added to the log.
  1658  
  1659  	logFields := make(common.LogFields)
  1660  
  1661  	if whichMetrics&packetMetricsRejected != 0 {
  1662  
  1663  		for i := 0; i < packetRejectReasonCount; i++ {
  1664  			logFields["upstream_packet_rejected_"+packetRejectReasonDescription(packetRejectReason(i))] =
  1665  				atomic.SwapInt64(&metrics.upstreamRejectReasons[i], 0)
  1666  			logFields["downstream_packet_rejected_"+packetRejectReasonDescription(packetRejectReason(i))] =
  1667  				atomic.SwapInt64(&metrics.downstreamRejectReasons[i], 0)
  1668  		}
  1669  	}
  1670  
  1671  	if whichMetrics&packetMetricsRelayed != 0 {
  1672  
  1673  		var TCPApplicationBytesUp, TCPApplicationBytesDown,
  1674  			UDPApplicationBytesUp, UDPApplicationBytesDown int64
  1675  
  1676  		relayedMetrics := []struct {
  1677  			prefix           string
  1678  			metrics          *relayedPacketMetrics
  1679  			updaterBytesUp   *int64
  1680  			updaterBytesDown *int64
  1681  		}{
  1682  			{"tcp_ipv4_", &metrics.TCPIPv4, &TCPApplicationBytesUp, &TCPApplicationBytesDown},
  1683  			{"tcp_ipv6_", &metrics.TCPIPv6, &TCPApplicationBytesUp, &TCPApplicationBytesDown},
  1684  			{"udp_ipv4_", &metrics.UDPIPv4, &UDPApplicationBytesUp, &UDPApplicationBytesDown},
  1685  			{"udp_ipv6_", &metrics.UDPIPv6, &UDPApplicationBytesUp, &UDPApplicationBytesDown},
  1686  		}
  1687  
  1688  		for _, r := range relayedMetrics {
  1689  
  1690  			applicationBytesUp := atomic.SwapInt64(&r.metrics.applicationBytesUp, 0)
  1691  			applicationBytesDown := atomic.SwapInt64(&r.metrics.applicationBytesDown, 0)
  1692  
  1693  			*r.updaterBytesUp += applicationBytesUp
  1694  			*r.updaterBytesDown += applicationBytesDown
  1695  
  1696  			logFields[r.prefix+"packets_up"] = atomic.SwapInt64(&r.metrics.packetsUp, 0)
  1697  			logFields[r.prefix+"packets_down"] = atomic.SwapInt64(&r.metrics.packetsDown, 0)
  1698  			logFields[r.prefix+"bytes_up"] = atomic.SwapInt64(&r.metrics.bytesUp, 0)
  1699  			logFields[r.prefix+"bytes_down"] = atomic.SwapInt64(&r.metrics.bytesDown, 0)
  1700  			logFields[r.prefix+"application_bytes_up"] = applicationBytesUp
  1701  			logFields[r.prefix+"application_bytes_down"] = applicationBytesDown
  1702  		}
  1703  
  1704  		if updater != nil {
  1705  			updater(
  1706  				TCPApplicationBytesUp, TCPApplicationBytesDown,
  1707  				UDPApplicationBytesUp, UDPApplicationBytesDown)
  1708  		}
  1709  	}
  1710  
  1711  	logger.LogMetric(logName, logFields)
  1712  }
  1713  
  1714  // PacketQueue is a fixed-size, preallocated queue of packets.
  1715  // Enqueued packets are packed into a contiguous buffer with channel
  1716  // framing, allowing the entire queue to be written to a channel
  1717  // in a single call.
  1718  // Reuse of the queue buffers avoids GC churn. To avoid memory use
  1719  // spikes when many clients connect and may disconnect before relaying
  1720  // packets, the packet queue buffers start small and grow when required,
  1721  // up to the maximum size, and then remain static.
  1722  type PacketQueue struct {
  1723  	maxSize      int
  1724  	emptyBuffers chan []byte
  1725  	activeBuffer chan []byte
  1726  }
  1727  
  1728  // NewPacketQueue creates a new PacketQueue.
  1729  // The caller must ensure that maxSize exceeds the
  1730  // packet MTU, or packets will will never enqueue.
  1731  func NewPacketQueue(maxSize int) *PacketQueue {
  1732  
  1733  	// Two buffers of size up to maxSize are allocated, to
  1734  	// allow packets to continue to enqueue while one buffer
  1735  	// is borrowed by the DequeueFramedPackets caller.
  1736  	//
  1737  	// TODO: is there a way to implement this without
  1738  	// allocating up to 2x maxSize bytes? A circular queue
  1739  	// won't work because we want DequeueFramedPackets
  1740  	// to return a contiguous buffer. Perhaps a Bip
  1741  	// Buffer would work here:
  1742  	// https://www.codeproject.com/Articles/3479/The-Bip-Buffer-The-Circular-Buffer-with-a-Twist
  1743  
  1744  	queue := &PacketQueue{
  1745  		maxSize:      maxSize,
  1746  		emptyBuffers: make(chan []byte, 2),
  1747  		activeBuffer: make(chan []byte, 1),
  1748  	}
  1749  
  1750  	queue.emptyBuffers <- make([]byte, 0)
  1751  	queue.emptyBuffers <- make([]byte, 0)
  1752  
  1753  	return queue
  1754  }
  1755  
  1756  // Enqueue adds a packet to the queue.
  1757  // If the queue is full, the packet is dropped.
  1758  // Enqueue is _not_ safe for concurrent calls.
  1759  func (queue *PacketQueue) Enqueue(packet []byte) {
  1760  
  1761  	var buffer []byte
  1762  
  1763  	select {
  1764  	case buffer = <-queue.activeBuffer:
  1765  	default:
  1766  		buffer = <-queue.emptyBuffers
  1767  	}
  1768  
  1769  	packetSize := len(packet)
  1770  
  1771  	if queue.maxSize-len(buffer) >= channelHeaderSize+packetSize {
  1772  		// Assumes len(packet)/MTU <= 64K
  1773  		var channelHeader [channelHeaderSize]byte
  1774  		binary.BigEndian.PutUint16(channelHeader[:], uint16(packetSize))
  1775  
  1776  		// Once the buffer has reached maxSize capacity
  1777  		// and been replaced (buffer = buffer[0:0]), these
  1778  		// appends should no longer allocate new memory and
  1779  		// should just copy to preallocated memory.
  1780  
  1781  		buffer = append(buffer, channelHeader[:]...)
  1782  		buffer = append(buffer, packet...)
  1783  	}
  1784  	// Else, queue is full, so drop packet.
  1785  
  1786  	queue.activeBuffer <- buffer
  1787  }
  1788  
  1789  // DequeueFramedPackets waits until at least one packet is
  1790  // enqueued, and then returns a packet buffer containing one
  1791  // or more framed packets. The returned buffer remains part
  1792  // of the PacketQueue structure and the caller _must_ replace
  1793  // the buffer by calling Replace.
  1794  // DequeueFramedPackets unblocks and returns false if it receives
  1795  // runContext.Done().
  1796  // DequeueFramedPackets is _not_ safe for concurrent calls.
  1797  func (queue *PacketQueue) DequeueFramedPackets(
  1798  	runContext context.Context) ([]byte, bool) {
  1799  
  1800  	var buffer []byte
  1801  
  1802  	select {
  1803  	case buffer = <-queue.activeBuffer:
  1804  	case <-runContext.Done():
  1805  		return nil, false
  1806  	}
  1807  
  1808  	return buffer, true
  1809  }
  1810  
  1811  // Replace returns the buffer to the PacketQueue to be
  1812  // reused.
  1813  // The input must be a return value from DequeueFramedPackets.
  1814  func (queue *PacketQueue) Replace(buffer []byte) {
  1815  
  1816  	buffer = buffer[0:0]
  1817  
  1818  	// This won't block (as long as it is a DequeueFramedPackets return value).
  1819  	queue.emptyBuffers <- buffer
  1820  }
  1821  
  1822  // ClientConfig specifies the configuration of a packet tunnel client.
  1823  type ClientConfig struct {
  1824  
  1825  	// Logger is used for logging events and metrics.
  1826  	Logger common.Logger
  1827  
  1828  	// SudoNetworkConfigCommands specifies whether to use "sudo"
  1829  	// when executing network configuration commands. See description
  1830  	// for ServerConfig.SudoNetworkConfigCommands.
  1831  	SudoNetworkConfigCommands bool
  1832  
  1833  	// AllowNoIPv6NetworkConfiguration indicates that failures while
  1834  	// configuring tun interfaces and routing for IPv6 are to be
  1835  	// logged as warnings only. See description for
  1836  	// ServerConfig.AllowNoIPv6NetworkConfiguration.
  1837  	AllowNoIPv6NetworkConfiguration bool
  1838  
  1839  	// MTU is the packet MTU value to use; this value
  1840  	// should be obtained from the packet tunnel server.
  1841  	// When MTU is 0, a default value is used.
  1842  	MTU int
  1843  
  1844  	// UpstreamPacketQueueSize specifies the size of the upstream
  1845  	// packet queue.
  1846  	// When UpstreamPacketQueueSize is 0, a default value tuned for
  1847  	// Psiphon is used.
  1848  	UpstreamPacketQueueSize int
  1849  
  1850  	// Transport is an established transport channel that
  1851  	// will be used to relay packets to and from a packet
  1852  	// tunnel server.
  1853  	Transport io.ReadWriteCloser
  1854  
  1855  	// TunFileDescriptor specifies a file descriptor to use to
  1856  	// read and write packets to be relayed to the client. When
  1857  	// TunFileDescriptor is specified, the Client will use this
  1858  	// existing tun device and not create its own; in this case,
  1859  	// network address and routing configuration is not performed
  1860  	// by the Client. As the packet tunnel server performs
  1861  	// transparent source IP address and DNS rewriting, the tun
  1862  	// device may have any assigned IP address, but should be
  1863  	// configured with the given MTU; and DNS should be configured
  1864  	// to use the transparent DNS target resolver addresses.
  1865  	// Set TunFileDescriptor to <= 0 to ignore this parameter
  1866  	// and create and configure a tun device.
  1867  	TunFileDescriptor int
  1868  
  1869  	// IPv4AddressCIDR is the IPv4 address and netmask to
  1870  	// assign to a newly created tun device.
  1871  	IPv4AddressCIDR string
  1872  
  1873  	// IPv6AddressCIDR is the IPv6 address and prefix to
  1874  	// assign to a newly created tun device.
  1875  	IPv6AddressCIDR string
  1876  
  1877  	// RouteDestinations are hosts (IPs) or networks (CIDRs)
  1878  	// to be configured to be routed through a newly
  1879  	// created tun device.
  1880  	RouteDestinations []string
  1881  }
  1882  
  1883  // Client is a packet tunnel client. A packet tunnel client
  1884  // relays packets between a local tun device and a packet
  1885  // tunnel server via a transport channel.
  1886  type Client struct {
  1887  	config          *ClientConfig
  1888  	device          *Device
  1889  	channel         *Channel
  1890  	upstreamPackets *PacketQueue
  1891  	metrics         *packetMetrics
  1892  	runContext      context.Context
  1893  	stopRunning     context.CancelFunc
  1894  	workers         *sync.WaitGroup
  1895  }
  1896  
  1897  // NewClient initializes a new Client. Unless using the
  1898  // TunFileDescriptor configuration parameter, a new tun
  1899  // device is created for the client.
  1900  func NewClient(config *ClientConfig) (*Client, error) {
  1901  
  1902  	var device *Device
  1903  	var err error
  1904  
  1905  	if config.TunFileDescriptor > 0 {
  1906  		device, err = NewClientDeviceFromFD(config)
  1907  	} else {
  1908  		device, err = NewClientDevice(config)
  1909  	}
  1910  
  1911  	if err != nil {
  1912  		return nil, errors.Trace(err)
  1913  	}
  1914  
  1915  	upstreamPacketQueueSize := DEFAULT_UPSTREAM_PACKET_QUEUE_SIZE
  1916  	if config.UpstreamPacketQueueSize > 0 {
  1917  		upstreamPacketQueueSize = config.UpstreamPacketQueueSize
  1918  	}
  1919  
  1920  	runContext, stopRunning := context.WithCancel(context.Background())
  1921  
  1922  	return &Client{
  1923  		config:          config,
  1924  		device:          device,
  1925  		channel:         NewChannel(config.Transport, getMTU(config.MTU)),
  1926  		upstreamPackets: NewPacketQueue(upstreamPacketQueueSize),
  1927  		metrics:         new(packetMetrics),
  1928  		runContext:      runContext,
  1929  		stopRunning:     stopRunning,
  1930  		workers:         new(sync.WaitGroup),
  1931  	}, nil
  1932  }
  1933  
  1934  // Start starts a client and returns with it running.
  1935  func (client *Client) Start() {
  1936  
  1937  	client.config.Logger.WithTrace().Info("starting")
  1938  
  1939  	client.workers.Add(1)
  1940  	go func() {
  1941  		defer client.workers.Done()
  1942  		for {
  1943  			readPacket, err := client.device.ReadPacket()
  1944  
  1945  			select {
  1946  			case <-client.runContext.Done():
  1947  				// No error is logged as shutdown may have interrupted read.
  1948  				return
  1949  			default:
  1950  			}
  1951  
  1952  			if err != nil {
  1953  				client.config.Logger.WithTraceFields(
  1954  					common.LogFields{"error": err}).Info("read device packet failed")
  1955  				// May be temporary error condition, keep working.
  1956  				continue
  1957  			}
  1958  
  1959  			// processPacket will check for packets the server will reject
  1960  			// and drop those without sending.
  1961  
  1962  			// Limitation: packet metrics, including successful relay count,
  1963  			// are incremented _before_ the packet is written to the channel.
  1964  
  1965  			if !processPacket(
  1966  				client.metrics,
  1967  				nil,
  1968  				packetDirectionClientUpstream,
  1969  				readPacket) {
  1970  				continue
  1971  			}
  1972  
  1973  			// Instead of immediately writing to the channel, the
  1974  			// packet is enqueued, which has the effect of batching
  1975  			// up IP packets into a single channel packet (for Psiphon,
  1976  			// an SSH packet) to minimize overhead and, as benchmarked,
  1977  			// improve throughput.
  1978  			// Packet will be dropped if queue is full.
  1979  
  1980  			client.upstreamPackets.Enqueue(readPacket)
  1981  		}
  1982  	}()
  1983  
  1984  	client.workers.Add(1)
  1985  	go func() {
  1986  		defer client.workers.Done()
  1987  		for {
  1988  			packetBuffer, ok := client.upstreamPackets.DequeueFramedPackets(client.runContext)
  1989  			if !ok {
  1990  				// Dequeue aborted due to session.runContext.Done()
  1991  				return
  1992  			}
  1993  
  1994  			err := client.channel.WriteFramedPackets(packetBuffer)
  1995  
  1996  			client.upstreamPackets.Replace(packetBuffer)
  1997  
  1998  			if err != nil {
  1999  				client.config.Logger.WithTraceFields(
  2000  					common.LogFields{"error": err}).Info("write channel packets failed")
  2001  				// May be temporary error condition, such as reconnecting the tunnel;
  2002  				// keep working. The packets are most likely dropped.
  2003  				continue
  2004  			}
  2005  		}
  2006  	}()
  2007  
  2008  	client.workers.Add(1)
  2009  	go func() {
  2010  		defer client.workers.Done()
  2011  		for {
  2012  			readPacket, err := client.channel.ReadPacket()
  2013  
  2014  			select {
  2015  			case <-client.runContext.Done():
  2016  				// No error is logged as shutdown may have interrupted read.
  2017  				return
  2018  			default:
  2019  			}
  2020  
  2021  			if err != nil {
  2022  				client.config.Logger.WithTraceFields(
  2023  					common.LogFields{"error": err}).Info("read channel packet failed")
  2024  				// May be temporary error condition, such as reconnecting the tunnel;
  2025  				// keep working.
  2026  				continue
  2027  			}
  2028  
  2029  			if !processPacket(
  2030  				client.metrics,
  2031  				nil,
  2032  				packetDirectionClientDownstream,
  2033  				readPacket) {
  2034  				continue
  2035  			}
  2036  
  2037  			err = client.device.WritePacket(readPacket)
  2038  
  2039  			if err != nil {
  2040  				client.config.Logger.WithTraceFields(
  2041  					common.LogFields{"error": err}).Info("write device packet failed")
  2042  				// May be temporary error condition, keep working. The packet is
  2043  				// most likely dropped.
  2044  				continue
  2045  			}
  2046  		}
  2047  	}()
  2048  }
  2049  
  2050  // Stop halts a running client.
  2051  func (client *Client) Stop() {
  2052  
  2053  	client.config.Logger.WithTrace().Info("stopping")
  2054  
  2055  	client.stopRunning()
  2056  	client.device.Close()
  2057  	client.channel.Close()
  2058  
  2059  	client.workers.Wait()
  2060  
  2061  	client.metrics.checkpoint(
  2062  		client.config.Logger, nil, "packet_metrics", packetMetricsAll)
  2063  
  2064  	client.config.Logger.WithTrace().Info("stopped")
  2065  }
  2066  
  2067  /*
  2068     Packet offset constants in getPacketDestinationIPAddress and
  2069     processPacket are from the following RFC definitions.
  2070  
  2071  
  2072     IPv4 header: https://tools.ietf.org/html/rfc791
  2073  
  2074      0                   1                   2                   3
  2075      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  2076     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2077     |Version|  IHL  |Type of Service|          Total Length         |
  2078     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2079     |         Identification        |Flags|      Fragment Offset    |
  2080     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2081     |  Time to Live |    Protocol   |         Header Checksum       |
  2082     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2083     |                       Source Address                          |
  2084     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2085     |                    Destination Address                        |
  2086     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2087     |                    Options                    |    Padding    |
  2088     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2089  
  2090     IPv6 header: https://tools.ietf.org/html/rfc2460
  2091  
  2092      0                   1                   2                   3
  2093      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  2094     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2095     |Version| Traffic Class |           Flow Label                  |
  2096     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2097     |         Payload Length        |  Next Header  |   Hop Limit   |
  2098     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2099     |                                                               |
  2100     +                                                               +
  2101     |                                                               |
  2102     +                         Source Address                        +
  2103     |                                                               |
  2104     +                                                               +
  2105     |                                                               |
  2106     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2107     |                                                               |
  2108     +                                                               +
  2109     |                                                               |
  2110     +                      Destination Address                      +
  2111     |                                                               |
  2112     +                                                               +
  2113     |                                                               |
  2114     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2115  
  2116     TCP header: https://tools.ietf.org/html/rfc793
  2117  
  2118      0                   1                   2                   3
  2119      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  2120     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2121     |          Source Port          |       Destination Port        |
  2122     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2123     |                        Sequence Number                        |
  2124     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2125     |                    Acknowledgment Number                      |
  2126     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2127     |  Data |           |U|A|P|R|S|F|                               |
  2128     | Offset| Reserved  |R|C|S|S|Y|I|            Window             |
  2129     |       |           |G|K|H|T|N|N|                               |
  2130     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2131     |           Checksum            |         Urgent Pointer        |
  2132     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2133     |                    Options                    |    Padding    |
  2134     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2135     |                             data                              |
  2136     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  2137  
  2138     UDP header: https://tools.ietf.org/html/rfc768
  2139  
  2140                    0      7 8     15 16    23 24    31
  2141                   +--------+--------+--------+--------+
  2142                   |     Source      |   Destination   |
  2143                   |      Port       |      Port       |
  2144                   +--------+--------+--------+--------+
  2145                   |                 |                 |
  2146                   |     Length      |    Checksum     |
  2147                   +--------+--------+--------+--------+
  2148                   |
  2149                   |          data octets ...
  2150                   +---------------- ...
  2151  */
  2152  
  2153  const (
  2154  	packetDirectionServerUpstream   = 0
  2155  	packetDirectionServerDownstream = 1
  2156  	packetDirectionClientUpstream   = 2
  2157  	packetDirectionClientDownstream = 3
  2158  
  2159  	internetProtocolTCP = 6
  2160  	internetProtocolUDP = 17
  2161  
  2162  	portNumberDNS = 53
  2163  
  2164  	packetRejectNoSession          = 0
  2165  	packetRejectDestinationAddress = 1
  2166  	packetRejectLength             = 2
  2167  	packetRejectVersion            = 3
  2168  	packetRejectOptions            = 4
  2169  	packetRejectProtocol           = 5
  2170  	packetRejectTCPProtocolLength  = 6
  2171  	packetRejectUDPProtocolLength  = 7
  2172  	packetRejectTCPPort            = 8
  2173  	packetRejectUDPPort            = 9
  2174  	packetRejectNoOriginalAddress  = 10
  2175  	packetRejectNoDNSResolvers     = 11
  2176  	packetRejectInvalidDNSMessage  = 12
  2177  	packetRejectDisallowedDomain   = 13
  2178  	packetRejectNoClient           = 14
  2179  	packetRejectReasonCount        = 15
  2180  	packetOk                       = 15
  2181  )
  2182  
  2183  type packetDirection int
  2184  type internetProtocol int
  2185  type packetRejectReason int
  2186  
  2187  func packetRejectReasonDescription(reason packetRejectReason) string {
  2188  
  2189  	// Description strings follow the metrics naming
  2190  	// convention: all lowercase; underscore seperators.
  2191  
  2192  	switch reason {
  2193  	case packetRejectNoSession:
  2194  		return "no_session"
  2195  	case packetRejectDestinationAddress:
  2196  		return "invalid_destination_address"
  2197  	case packetRejectLength:
  2198  		return "invalid_ip_packet_length"
  2199  	case packetRejectVersion:
  2200  		return "invalid_ip_header_version"
  2201  	case packetRejectOptions:
  2202  		return "invalid_ip_header_options"
  2203  	case packetRejectProtocol:
  2204  		return "invalid_ip_header_protocol"
  2205  	case packetRejectTCPProtocolLength:
  2206  		return "invalid_tcp_packet_length"
  2207  	case packetRejectUDPProtocolLength:
  2208  		return "invalid_tcp_packet_length"
  2209  	case packetRejectTCPPort:
  2210  		return "disallowed_tcp_destination_port"
  2211  	case packetRejectUDPPort:
  2212  		return "disallowed_udp_destination_port"
  2213  	case packetRejectNoOriginalAddress:
  2214  		return "no_original_address"
  2215  	case packetRejectNoDNSResolvers:
  2216  		return "no_dns_resolvers"
  2217  	case packetRejectInvalidDNSMessage:
  2218  		return "invalid_dns_message"
  2219  	case packetRejectDisallowedDomain:
  2220  		return "disallowed_domain"
  2221  	case packetRejectNoClient:
  2222  		return "no_client"
  2223  	}
  2224  
  2225  	return "unknown_reason"
  2226  }
  2227  
  2228  // Caller: the destination IP address return value is
  2229  // a slice of the packet input value and only valid while
  2230  // the packet buffer remains valid.
  2231  func getPacketDestinationIPAddress(
  2232  	metrics *packetMetrics,
  2233  	direction packetDirection,
  2234  	packet []byte) (net.IP, bool) {
  2235  
  2236  	// TODO: this function duplicates a subset of the packet
  2237  	// parsing code in processPacket. Refactor to reuse code;
  2238  	// also, both getPacketDestinationIPAddress and processPacket
  2239  	// are called for some packets; refactor to only parse once.
  2240  
  2241  	if len(packet) < 1 {
  2242  		metrics.rejectedPacket(direction, packetRejectLength)
  2243  		return nil, false
  2244  	}
  2245  
  2246  	version := packet[0] >> 4
  2247  
  2248  	if version != 4 && version != 6 {
  2249  		metrics.rejectedPacket(direction, packetRejectVersion)
  2250  		return nil, false
  2251  	}
  2252  
  2253  	if version == 4 {
  2254  		if len(packet) < 20 {
  2255  			metrics.rejectedPacket(direction, packetRejectLength)
  2256  			return nil, false
  2257  		}
  2258  
  2259  		return packet[16:20], true
  2260  
  2261  	} else { // IPv6
  2262  		if len(packet) < 40 {
  2263  			metrics.rejectedPacket(direction, packetRejectLength)
  2264  			return nil, false
  2265  		}
  2266  
  2267  		return packet[24:40], true
  2268  	}
  2269  }
  2270  
  2271  // processPacket parses IP packets, applies relaying rules,
  2272  // and rewrites packet elements as required. processPacket
  2273  // returns true if a packet parses correctly, is accepted
  2274  // by the relay rules, and is successfully rewritten.
  2275  //
  2276  // When a packet is rejected, processPacket returns false
  2277  // and updates a reason in the supplied metrics.
  2278  //
  2279  // Rejection may result in partially rewritten packets.
  2280  func processPacket(
  2281  	metrics *packetMetrics,
  2282  	session *session,
  2283  	direction packetDirection,
  2284  	packet []byte) bool {
  2285  
  2286  	// Parse and validate IP packet structure
  2287  
  2288  	// Must have an IP version field.
  2289  	if len(packet) < 1 {
  2290  		metrics.rejectedPacket(direction, packetRejectLength)
  2291  		return false
  2292  	}
  2293  
  2294  	version := packet[0] >> 4
  2295  
  2296  	// Must be IPv4 or IPv6.
  2297  	if version != 4 && version != 6 {
  2298  		metrics.rejectedPacket(direction, packetRejectVersion)
  2299  		return false
  2300  	}
  2301  
  2302  	var protocol internetProtocol
  2303  	var sourceIPAddress, destinationIPAddress net.IP
  2304  	var sourcePort, destinationPort uint16
  2305  	var IPChecksum, TCPChecksum, UDPChecksum []byte
  2306  	var applicationData []byte
  2307  
  2308  	if version == 4 {
  2309  
  2310  		// IHL must be 5: options are not supported; a fixed
  2311  		// 20 byte header is expected.
  2312  
  2313  		headerLength := packet[0] & 0x0F
  2314  
  2315  		if headerLength != 5 {
  2316  			metrics.rejectedPacket(direction, packetRejectOptions)
  2317  			return false
  2318  		}
  2319  
  2320  		if len(packet) < 20 {
  2321  			metrics.rejectedPacket(direction, packetRejectLength)
  2322  			return false
  2323  		}
  2324  
  2325  		// Protocol must be TCP or UDP.
  2326  
  2327  		protocol = internetProtocol(packet[9])
  2328  		dataOffset := 0
  2329  
  2330  		if protocol == internetProtocolTCP {
  2331  			if len(packet) < 33 {
  2332  				metrics.rejectedPacket(direction, packetRejectTCPProtocolLength)
  2333  				return false
  2334  			}
  2335  			dataOffset = 20 + 4*int(packet[32]>>4)
  2336  			if len(packet) < dataOffset {
  2337  				metrics.rejectedPacket(direction, packetRejectTCPProtocolLength)
  2338  				return false
  2339  			}
  2340  		} else if protocol == internetProtocolUDP {
  2341  			dataOffset = 28
  2342  			if len(packet) < dataOffset {
  2343  				metrics.rejectedPacket(direction, packetRejectUDPProtocolLength)
  2344  				return false
  2345  			}
  2346  		} else {
  2347  			metrics.rejectedPacket(direction, packetRejectProtocol)
  2348  			return false
  2349  		}
  2350  
  2351  		applicationData = packet[dataOffset:]
  2352  
  2353  		// Slices reference packet bytes to be rewritten.
  2354  
  2355  		sourceIPAddress = packet[12:16]
  2356  		destinationIPAddress = packet[16:20]
  2357  		IPChecksum = packet[10:12]
  2358  
  2359  		// Port numbers have the same offset in TCP and UDP.
  2360  
  2361  		sourcePort = binary.BigEndian.Uint16(packet[20:22])
  2362  		destinationPort = binary.BigEndian.Uint16(packet[22:24])
  2363  
  2364  		if protocol == internetProtocolTCP {
  2365  			TCPChecksum = packet[36:38]
  2366  		} else { // UDP
  2367  			UDPChecksum = packet[26:28]
  2368  		}
  2369  
  2370  	} else { // IPv6
  2371  
  2372  		if len(packet) < 40 {
  2373  			metrics.rejectedPacket(direction, packetRejectLength)
  2374  			return false
  2375  		}
  2376  
  2377  		// Next Header must be TCP or UDP.
  2378  
  2379  		nextHeader := packet[6]
  2380  
  2381  		protocol = internetProtocol(nextHeader)
  2382  		dataOffset := 0
  2383  
  2384  		if protocol == internetProtocolTCP {
  2385  			if len(packet) < 53 {
  2386  				metrics.rejectedPacket(direction, packetRejectTCPProtocolLength)
  2387  				return false
  2388  			}
  2389  			dataOffset = 40 + 4*int(packet[52]>>4)
  2390  			if len(packet) < dataOffset {
  2391  				metrics.rejectedPacket(direction, packetRejectTCPProtocolLength)
  2392  				return false
  2393  			}
  2394  		} else if protocol == internetProtocolUDP {
  2395  			dataOffset = 48
  2396  			if len(packet) < dataOffset {
  2397  				metrics.rejectedPacket(direction, packetRejectUDPProtocolLength)
  2398  				return false
  2399  			}
  2400  		} else {
  2401  			metrics.rejectedPacket(direction, packetRejectProtocol)
  2402  			return false
  2403  		}
  2404  
  2405  		applicationData = packet[dataOffset:]
  2406  
  2407  		// Slices reference packet bytes to be rewritten.
  2408  
  2409  		sourceIPAddress = packet[8:24]
  2410  		destinationIPAddress = packet[24:40]
  2411  
  2412  		// Port numbers have the same offset in TCP and UDP.
  2413  
  2414  		sourcePort = binary.BigEndian.Uint16(packet[40:42])
  2415  		destinationPort = binary.BigEndian.Uint16(packet[42:44])
  2416  
  2417  		if protocol == internetProtocolTCP {
  2418  			TCPChecksum = packet[56:58]
  2419  		} else { // UDP
  2420  			UDPChecksum = packet[46:48]
  2421  		}
  2422  	}
  2423  
  2424  	// Apply rules
  2425  	//
  2426  	// Most of this logic is only applied on the server, as only
  2427  	// the server knows the traffic rules configuration, and is
  2428  	// tracking flows.
  2429  
  2430  	isServer := (direction == packetDirectionServerUpstream ||
  2431  		direction == packetDirectionServerDownstream)
  2432  
  2433  	// Check if the packet qualifies for transparent DNS rewriting
  2434  	//
  2435  	// - Both TCP and UDP DNS packets may qualify
  2436  	// - Unless configured, transparent DNS flows are not tracked,
  2437  	//   as most DNS resolutions are very-short lived exchanges
  2438  	// - The traffic rules checks are bypassed, since transparent
  2439  	//   DNS is essential
  2440  
  2441  	doTransparentDNS := false
  2442  
  2443  	if isServer {
  2444  		if direction == packetDirectionServerUpstream {
  2445  
  2446  			// DNS packets destinated for the transparent DNS target addresses
  2447  			// will be rewritten to go to one of the server's resolvers.
  2448  
  2449  			if destinationPort == portNumberDNS {
  2450  				if version == 4 &&
  2451  					destinationIPAddress.Equal(transparentDNSResolverIPv4Address) {
  2452  
  2453  					numResolvers := len(session.DNSResolverIPv4Addresses)
  2454  					if numResolvers > 0 {
  2455  						doTransparentDNS = true
  2456  					} else {
  2457  						metrics.rejectedPacket(direction, packetRejectNoDNSResolvers)
  2458  						return false
  2459  					}
  2460  
  2461  				} else if version == 6 &&
  2462  					destinationIPAddress.Equal(transparentDNSResolverIPv6Address) {
  2463  
  2464  					numResolvers := len(session.DNSResolverIPv6Addresses)
  2465  					if numResolvers > 0 {
  2466  						doTransparentDNS = true
  2467  					} else {
  2468  						metrics.rejectedPacket(direction, packetRejectNoDNSResolvers)
  2469  						return false
  2470  					}
  2471  				}
  2472  
  2473  				// Limitation: checkAllowedDomainFunc is applied only to DNS queries in
  2474  				// UDP; currently DNS-over-TCP will bypass the domain block list check.
  2475  
  2476  				if doTransparentDNS && protocol == internetProtocolUDP {
  2477  
  2478  					domain, err := common.ParseDNSQuestion(applicationData)
  2479  					if err != nil {
  2480  						metrics.rejectedPacket(direction, packetRejectInvalidDNSMessage)
  2481  						return false
  2482  					}
  2483  					if domain != "" {
  2484  						checkAllowedDomainFunc := session.getCheckAllowedDomainFunc()
  2485  						if !checkAllowedDomainFunc(domain) {
  2486  							metrics.rejectedPacket(direction, packetRejectDisallowedDomain)
  2487  							return false
  2488  						}
  2489  					}
  2490  				}
  2491  			}
  2492  
  2493  		} else { // packetDirectionServerDownstream
  2494  
  2495  			// DNS packets with a source address of any of the server's
  2496  			// resolvers will be rewritten back to the transparent DNS target
  2497  			// address.
  2498  
  2499  			// Limitation: responses to client DNS packets _originally
  2500  			// destined_ for a resolver in GetDNSResolverIPv4Addresses will
  2501  			// be lost. This would happen if some process on the client
  2502  			// ignores the system set DNS values; and forces use of the same
  2503  			// resolvers as the server.
  2504  
  2505  			if sourcePort == portNumberDNS {
  2506  				if version == 4 {
  2507  					for _, IPAddress := range session.DNSResolverIPv4Addresses {
  2508  						if sourceIPAddress.Equal(IPAddress) {
  2509  							doTransparentDNS = true
  2510  							break
  2511  						}
  2512  					}
  2513  				} else if version == 6 {
  2514  					for _, IPAddress := range session.DNSResolverIPv6Addresses {
  2515  						if sourceIPAddress.Equal(IPAddress) {
  2516  							doTransparentDNS = true
  2517  							break
  2518  						}
  2519  					}
  2520  				}
  2521  			}
  2522  		}
  2523  	}
  2524  
  2525  	// Apply rewrites before determining flow ID to ensure that corresponding up-
  2526  	// and downstream flows yield the same flow ID.
  2527  
  2528  	var rewriteSourceIPAddress, rewriteDestinationIPAddress net.IP
  2529  
  2530  	if direction == packetDirectionServerUpstream {
  2531  
  2532  		// Store original source IP address to be replaced in
  2533  		// downstream rewriting.
  2534  
  2535  		if version == 4 {
  2536  			session.setOriginalIPv4AddressIfNotSet(sourceIPAddress)
  2537  			rewriteSourceIPAddress = session.assignedIPv4Address
  2538  		} else { // version == 6
  2539  			session.setOriginalIPv6AddressIfNotSet(sourceIPAddress)
  2540  			rewriteSourceIPAddress = session.assignedIPv6Address
  2541  		}
  2542  
  2543  		// Rewrite DNS packets destinated for the transparent DNS target addresses
  2544  		// to go to one of the server's resolvers. This random selection uses
  2545  		// math/rand to minimize overhead.
  2546  		//
  2547  		// Limitation: TCP packets are always assigned to the same resolver, as
  2548  		// currently there is no method for tracking the assigned resolver per TCP
  2549  		// flow.
  2550  
  2551  		if doTransparentDNS {
  2552  			if version == 4 {
  2553  
  2554  				index := session.TCPDNSResolverIPv4Index
  2555  				if protocol == internetProtocolUDP {
  2556  					index = rand.Intn(len(session.DNSResolverIPv4Addresses))
  2557  				}
  2558  				rewriteDestinationIPAddress = session.DNSResolverIPv4Addresses[index]
  2559  
  2560  			} else { // version == 6
  2561  
  2562  				index := session.TCPDNSResolverIPv6Index
  2563  				if protocol == internetProtocolUDP {
  2564  					index = rand.Intn(len(session.DNSResolverIPv6Addresses))
  2565  				}
  2566  				rewriteDestinationIPAddress = session.DNSResolverIPv6Addresses[index]
  2567  			}
  2568  		}
  2569  
  2570  	} else if direction == packetDirectionServerDownstream {
  2571  
  2572  		// Destination address will be original source address.
  2573  
  2574  		if version == 4 {
  2575  			rewriteDestinationIPAddress = session.getOriginalIPv4Address()
  2576  		} else { // version == 6
  2577  			rewriteDestinationIPAddress = session.getOriginalIPv6Address()
  2578  		}
  2579  
  2580  		if rewriteDestinationIPAddress == nil {
  2581  			metrics.rejectedPacket(direction, packetRejectNoOriginalAddress)
  2582  			return false
  2583  		}
  2584  
  2585  		// Rewrite source address of packets from servers' resolvers
  2586  		// to transparent DNS target address.
  2587  
  2588  		if doTransparentDNS {
  2589  
  2590  			if version == 4 {
  2591  				rewriteSourceIPAddress = transparentDNSResolverIPv4Address
  2592  			} else { // version == 6
  2593  				rewriteSourceIPAddress = transparentDNSResolverIPv6Address
  2594  			}
  2595  		}
  2596  	}
  2597  
  2598  	// Check if flow is tracked before checking traffic permission
  2599  
  2600  	doFlowTracking := isServer && (!doTransparentDNS || session.enableDNSFlowTracking)
  2601  
  2602  	// TODO: verify this struct is stack allocated
  2603  	var ID flowID
  2604  
  2605  	isTrackingFlow := false
  2606  
  2607  	if doFlowTracking {
  2608  
  2609  		if direction == packetDirectionServerUpstream {
  2610  
  2611  			// Reflect rewrites in the upstream case and don't reflect rewrites in the
  2612  			// following downstream case: all flow IDs are in the upstream space, with
  2613  			// the assigned private IP for the client and, in the case of DNS, the
  2614  			// actual resolver IP.
  2615  
  2616  			srcIP := sourceIPAddress
  2617  			if rewriteSourceIPAddress != nil {
  2618  				srcIP = rewriteSourceIPAddress
  2619  			}
  2620  
  2621  			destIP := destinationIPAddress
  2622  			if rewriteDestinationIPAddress != nil {
  2623  				destIP = rewriteDestinationIPAddress
  2624  			}
  2625  
  2626  			ID.set(srcIP, sourcePort, destIP, destinationPort, protocol)
  2627  
  2628  		} else if direction == packetDirectionServerDownstream {
  2629  
  2630  			ID.set(
  2631  				destinationIPAddress,
  2632  				destinationPort,
  2633  				sourceIPAddress,
  2634  				sourcePort,
  2635  				protocol)
  2636  		}
  2637  
  2638  		isTrackingFlow = session.isTrackingFlow(ID)
  2639  	}
  2640  
  2641  	// Check packet source/destination is permitted; except for:
  2642  	// - existing flows, which have already been checked
  2643  	// - transparent DNS, which is always allowed
  2644  
  2645  	if !doTransparentDNS && !isTrackingFlow {
  2646  
  2647  		// Enforce traffic rules (allowed TCP/UDP ports).
  2648  
  2649  		checkPort := 0
  2650  		if direction == packetDirectionServerUpstream ||
  2651  			direction == packetDirectionClientUpstream {
  2652  
  2653  			checkPort = int(destinationPort)
  2654  
  2655  		} else if direction == packetDirectionServerDownstream ||
  2656  			direction == packetDirectionClientDownstream {
  2657  
  2658  			checkPort = int(sourcePort)
  2659  		}
  2660  
  2661  		if protocol == internetProtocolTCP {
  2662  
  2663  			invalidPort := (checkPort == 0)
  2664  
  2665  			if !invalidPort && isServer {
  2666  				checkAllowedTCPPortFunc := session.getCheckAllowedTCPPortFunc()
  2667  				if checkAllowedTCPPortFunc == nil ||
  2668  					!checkAllowedTCPPortFunc(net.IP(ID.upstreamIPAddress[:]), checkPort) {
  2669  					invalidPort = true
  2670  				}
  2671  			}
  2672  
  2673  			if invalidPort {
  2674  				metrics.rejectedPacket(direction, packetRejectTCPPort)
  2675  				return false
  2676  			}
  2677  
  2678  		} else if protocol == internetProtocolUDP {
  2679  
  2680  			invalidPort := (checkPort == 0)
  2681  
  2682  			if !invalidPort && isServer {
  2683  				checkAllowedUDPPortFunc := session.getCheckAllowedUDPPortFunc()
  2684  				if checkAllowedUDPPortFunc == nil ||
  2685  					!checkAllowedUDPPortFunc(net.IP(ID.upstreamIPAddress[:]), checkPort) {
  2686  					invalidPort = true
  2687  				}
  2688  			}
  2689  
  2690  			if invalidPort {
  2691  				metrics.rejectedPacket(direction, packetRejectUDPPort)
  2692  				return false
  2693  			}
  2694  		}
  2695  
  2696  		// Enforce no localhost, multicast or broadcast packets; and no
  2697  		// client-to-client packets.
  2698  		//
  2699  		// TODO: a client-side check could check that destination IP
  2700  		// is strictly a tun device IP address.
  2701  
  2702  		if !destinationIPAddress.IsGlobalUnicast() ||
  2703  
  2704  			(direction == packetDirectionServerUpstream &&
  2705  				!session.allowBogons &&
  2706  				common.IsBogon(destinationIPAddress)) ||
  2707  
  2708  			// Client-to-client packets are disallowed even when other bogons are
  2709  			// allowed.
  2710  			(direction == packetDirectionServerUpstream &&
  2711  				((version == 4 &&
  2712  					!destinationIPAddress.Equal(transparentDNSResolverIPv4Address) &&
  2713  					privateSubnetIPv4.Contains(destinationIPAddress)) ||
  2714  					(version == 6 &&
  2715  						!destinationIPAddress.Equal(transparentDNSResolverIPv6Address) &&
  2716  						privateSubnetIPv6.Contains(destinationIPAddress)))) {
  2717  
  2718  			metrics.rejectedPacket(direction, packetRejectDestinationAddress)
  2719  			return false
  2720  		}
  2721  	}
  2722  
  2723  	// Apply packet rewrites. IP (v4 only) and TCP/UDP all have packet
  2724  	// checksums which are updated to relect the rewritten headers.
  2725  
  2726  	var checksumAccumulator int32
  2727  
  2728  	if rewriteSourceIPAddress != nil {
  2729  		checksumAccumulate(sourceIPAddress, false, &checksumAccumulator)
  2730  		copy(sourceIPAddress, rewriteSourceIPAddress)
  2731  		checksumAccumulate(sourceIPAddress, true, &checksumAccumulator)
  2732  	}
  2733  
  2734  	if rewriteDestinationIPAddress != nil {
  2735  		checksumAccumulate(destinationIPAddress, false, &checksumAccumulator)
  2736  		copy(destinationIPAddress, rewriteDestinationIPAddress)
  2737  		checksumAccumulate(destinationIPAddress, true, &checksumAccumulator)
  2738  	}
  2739  
  2740  	if rewriteSourceIPAddress != nil || rewriteDestinationIPAddress != nil {
  2741  
  2742  		// IPv6 doesn't have an IP header checksum.
  2743  		if version == 4 {
  2744  			checksumAdjust(IPChecksum, checksumAccumulator)
  2745  		}
  2746  
  2747  		if protocol == internetProtocolTCP {
  2748  			checksumAdjust(TCPChecksum, checksumAccumulator)
  2749  		} else { // UDP
  2750  			checksumAdjust(UDPChecksum, checksumAccumulator)
  2751  		}
  2752  	}
  2753  
  2754  	// Start/update flow tracking, only once past all possible packet rejects
  2755  
  2756  	if doFlowTracking {
  2757  		if !isTrackingFlow {
  2758  			session.startTrackingFlow(ID, direction, applicationData, doTransparentDNS)
  2759  		} else {
  2760  			session.updateFlow(ID, direction, applicationData)
  2761  		}
  2762  	}
  2763  
  2764  	metrics.relayedPacket(direction, int(version), protocol, len(packet), len(applicationData))
  2765  
  2766  	return true
  2767  }
  2768  
  2769  // Checksum code based on https://github.com/OpenVPN/openvpn:
  2770  /*
  2771  OpenVPN (TM) -- An Open Source VPN daemon
  2772  
  2773  Copyright (C) 2002-2017 OpenVPN Technologies, Inc. <sales@openvpn.net>
  2774  
  2775  OpenVPN license:
  2776  ----------------
  2777  
  2778  OpenVPN is distributed under the GPL license version 2 (see COPYRIGHT.GPL).
  2779  */
  2780  
  2781  func checksumAccumulate(data []byte, newData bool, accumulator *int32) {
  2782  
  2783  	// Based on ADD_CHECKSUM_32 and SUB_CHECKSUM_32 macros from OpenVPN:
  2784  	// https://github.com/OpenVPN/openvpn/blob/58716979640b5d8850b39820f91da616964398cc/src/openvpn/proto.h#L177
  2785  
  2786  	// Assumes length of data is factor of 4.
  2787  
  2788  	for i := 0; i < len(data); i += 4 {
  2789  		word := uint32(data[i+0])<<24 | uint32(data[i+1])<<16 | uint32(data[i+2])<<8 | uint32(data[i+3])
  2790  		if newData {
  2791  			*accumulator -= int32(word & 0xFFFF)
  2792  			*accumulator -= int32(word >> 16)
  2793  		} else {
  2794  			*accumulator += int32(word & 0xFFFF)
  2795  			*accumulator += int32(word >> 16)
  2796  		}
  2797  	}
  2798  }
  2799  
  2800  func checksumAdjust(checksumData []byte, accumulator int32) {
  2801  
  2802  	// Based on ADJUST_CHECKSUM macro from OpenVPN:
  2803  	// https://github.com/OpenVPN/openvpn/blob/58716979640b5d8850b39820f91da616964398cc/src/openvpn/proto.h#L177
  2804  
  2805  	// Assumes checksumData is 2 byte slice.
  2806  
  2807  	checksum := uint16(checksumData[0])<<8 | uint16(checksumData[1])
  2808  
  2809  	accumulator += int32(checksum)
  2810  	if accumulator < 0 {
  2811  		accumulator = -accumulator
  2812  		accumulator = (accumulator >> 16) + (accumulator & 0xFFFF)
  2813  		accumulator += accumulator >> 16
  2814  		checksum = uint16(^accumulator)
  2815  	} else {
  2816  		accumulator = (accumulator >> 16) + (accumulator & 0xFFFF)
  2817  		accumulator += accumulator >> 16
  2818  		checksum = uint16(accumulator)
  2819  	}
  2820  
  2821  	checksumData[0] = byte(checksum >> 8)
  2822  	checksumData[1] = byte(checksum & 0xFF)
  2823  }
  2824  
  2825  /*
  2826  
  2827  packet debugging snippet:
  2828  
  2829  	import (
  2830          "github.com/google/gopacket"
  2831          "github.com/google/gopacket/layers"
  2832  	)
  2833  
  2834  
  2835  	func tracePacket(where string, packet []byte) {
  2836  		var p gopacket.Packet
  2837  		if len(packet) > 0 && packet[0]>>4 == 4 {
  2838  			p = gopacket.NewPacket(packet, layers.LayerTypeIPv4, gopacket.Default)
  2839  		} else {
  2840  			p = gopacket.NewPacket(packet, layers.LayerTypeIPv6, gopacket.Default)
  2841  		}
  2842  		fmt.Printf("[%s packet]:\n%s\n\n", where, p)
  2843  	}
  2844  */
  2845  
  2846  // Device manages a tun device. It handles packet I/O using static,
  2847  // preallocated buffers to avoid GC churn.
  2848  type Device struct {
  2849  	name           string
  2850  	writeMutex     sync.Mutex
  2851  	deviceIO       io.ReadWriteCloser
  2852  	inboundBuffer  []byte
  2853  	outboundBuffer []byte
  2854  }
  2855  
  2856  // NewServerDevice creates and configures a new server tun device.
  2857  // Since the server uses fixed address spaces, only one server
  2858  // device may exist per host.
  2859  func NewServerDevice(config *ServerConfig) (*Device, error) {
  2860  
  2861  	file, deviceName, err := OpenTunDevice("")
  2862  	if err != nil {
  2863  		return nil, errors.Trace(err)
  2864  	}
  2865  
  2866  	err = configureServerInterface(config, deviceName)
  2867  	if err != nil {
  2868  		_ = file.Close()
  2869  		return nil, errors.Trace(err)
  2870  	}
  2871  
  2872  	return newDevice(
  2873  		deviceName,
  2874  		file,
  2875  		getMTU(config.MTU)), nil
  2876  }
  2877  
  2878  // NewClientDevice creates and configures a new client tun device.
  2879  // Multiple client tun devices may exist per host.
  2880  func NewClientDevice(config *ClientConfig) (*Device, error) {
  2881  
  2882  	file, deviceName, err := OpenTunDevice("")
  2883  	if err != nil {
  2884  		return nil, errors.Trace(err)
  2885  	}
  2886  
  2887  	err = configureClientInterface(
  2888  		config, deviceName)
  2889  	if err != nil {
  2890  		_ = file.Close()
  2891  		return nil, errors.Trace(err)
  2892  	}
  2893  
  2894  	return newDevice(
  2895  		deviceName,
  2896  		file,
  2897  		getMTU(config.MTU)), nil
  2898  }
  2899  
  2900  func newDevice(
  2901  	name string,
  2902  	deviceIO io.ReadWriteCloser,
  2903  	MTU int) *Device {
  2904  
  2905  	return &Device{
  2906  		name:           name,
  2907  		deviceIO:       deviceIO,
  2908  		inboundBuffer:  makeDeviceInboundBuffer(MTU),
  2909  		outboundBuffer: makeDeviceOutboundBuffer(MTU),
  2910  	}
  2911  }
  2912  
  2913  // NewClientDeviceFromFD wraps an existing tun device.
  2914  func NewClientDeviceFromFD(config *ClientConfig) (*Device, error) {
  2915  
  2916  	file, err := fileFromFD(config.TunFileDescriptor, "")
  2917  	if err != nil {
  2918  		return nil, errors.Trace(err)
  2919  	}
  2920  
  2921  	MTU := getMTU(config.MTU)
  2922  
  2923  	return &Device{
  2924  		name:           "",
  2925  		deviceIO:       file,
  2926  		inboundBuffer:  makeDeviceInboundBuffer(MTU),
  2927  		outboundBuffer: makeDeviceOutboundBuffer(MTU),
  2928  	}, nil
  2929  }
  2930  
  2931  // Name returns the interface name for a created tun device,
  2932  // or returns "" for a device created by NewClientDeviceFromFD.
  2933  // The interface name may be used for additional network and
  2934  // routing configuration.
  2935  func (device *Device) Name() string {
  2936  	return device.name
  2937  }
  2938  
  2939  // ReadPacket reads one full packet from the tun device. The
  2940  // return value is a slice of a static, reused buffer, so the
  2941  // value is only valid until the next ReadPacket call.
  2942  // Concurrent calls to ReadPacket are _not_ supported.
  2943  func (device *Device) ReadPacket() ([]byte, error) {
  2944  
  2945  	// readTunPacket performs the platform dependent
  2946  	// packet read operation.
  2947  	offset, size, err := device.readTunPacket()
  2948  	if err != nil {
  2949  		return nil, errors.Trace(err)
  2950  	}
  2951  
  2952  	return device.inboundBuffer[offset : offset+size], nil
  2953  }
  2954  
  2955  // WritePacket writes one full packet to the tun device.
  2956  // Concurrent calls to WritePacket are supported.
  2957  func (device *Device) WritePacket(packet []byte) error {
  2958  
  2959  	// This mutex ensures that only one concurrent goroutine
  2960  	// can use outboundBuffer when writing.
  2961  	device.writeMutex.Lock()
  2962  	defer device.writeMutex.Unlock()
  2963  
  2964  	// writeTunPacket performs the platform dependent
  2965  	// packet write operation.
  2966  	err := device.writeTunPacket(packet)
  2967  	if err != nil {
  2968  		return errors.Trace(err)
  2969  	}
  2970  
  2971  	return nil
  2972  }
  2973  
  2974  // Close interrupts any blocking Read/Write calls and
  2975  // tears down the tun device.
  2976  func (device *Device) Close() error {
  2977  	return device.deviceIO.Close()
  2978  }
  2979  
  2980  // Channel manages packet transport over a communications channel.
  2981  // Any io.ReadWriteCloser can provide transport. In psiphond, the
  2982  // io.ReadWriteCloser will be an SSH channel. Channel I/O frames
  2983  // packets with a length header and uses static, preallocated
  2984  // buffers to avoid GC churn.
  2985  type Channel struct {
  2986  	transport      io.ReadWriteCloser
  2987  	inboundBuffer  []byte
  2988  	outboundBuffer []byte
  2989  }
  2990  
  2991  // IP packets cannot be larger that 64K, so a 16-bit length
  2992  // header is sufficient.
  2993  const (
  2994  	channelHeaderSize = 2
  2995  )
  2996  
  2997  // NewChannel initializes a new Channel.
  2998  func NewChannel(transport io.ReadWriteCloser, MTU int) *Channel {
  2999  	return &Channel{
  3000  		transport:      transport,
  3001  		inboundBuffer:  make([]byte, channelHeaderSize+MTU),
  3002  		outboundBuffer: make([]byte, channelHeaderSize+MTU),
  3003  	}
  3004  }
  3005  
  3006  // ReadPacket reads one full packet from the channel. The
  3007  // return value is a slice of a static, reused buffer, so the
  3008  // value is only valid until the next ReadPacket call.
  3009  // Concurrent calls to ReadPacket are not supported.
  3010  func (channel *Channel) ReadPacket() ([]byte, error) {
  3011  
  3012  	header := channel.inboundBuffer[0:channelHeaderSize]
  3013  	_, err := io.ReadFull(channel.transport, header)
  3014  	if err != nil {
  3015  		return nil, errors.Trace(err)
  3016  	}
  3017  
  3018  	size := int(binary.BigEndian.Uint16(header))
  3019  	if size > len(channel.inboundBuffer[channelHeaderSize:]) {
  3020  		return nil, errors.Tracef("packet size exceeds MTU: %d", size)
  3021  	}
  3022  
  3023  	packet := channel.inboundBuffer[channelHeaderSize : channelHeaderSize+size]
  3024  	_, err = io.ReadFull(channel.transport, packet)
  3025  	if err != nil {
  3026  		return nil, errors.Trace(err)
  3027  	}
  3028  
  3029  	return packet, nil
  3030  }
  3031  
  3032  // WritePacket writes one full packet to the channel.
  3033  // Concurrent calls to WritePacket are not supported.
  3034  func (channel *Channel) WritePacket(packet []byte) error {
  3035  
  3036  	// Flow control assumed to be provided by the transport. In the case
  3037  	// of SSH, the channel window size will determine whether the packet
  3038  	// data is transmitted immediately or whether the transport.Write will
  3039  	// block. When the channel window is full and transport.Write blocks,
  3040  	// the sender's tun device will not be read (client case) or the send
  3041  	// queue will fill (server case) and packets will be dropped. In this
  3042  	// way, the channel window size will influence the TCP window size for
  3043  	// tunneled traffic.
  3044  
  3045  	// When the transport is an SSH channel, the overhead per packet message
  3046  	// includes:
  3047  	//
  3048  	// - SSH_MSG_CHANNEL_DATA: 5 bytes (https://tools.ietf.org/html/rfc4254#section-5.2)
  3049  	// - SSH packet: ~28 bytes (https://tools.ietf.org/html/rfc4253#section-5.3), with MAC
  3050  	// - TCP/IP transport for SSH: 40 bytes for IPv4
  3051  
  3052  	// Assumes MTU <= 64K and len(packet) <= MTU
  3053  
  3054  	size := len(packet)
  3055  	binary.BigEndian.PutUint16(channel.outboundBuffer, uint16(size))
  3056  	copy(channel.outboundBuffer[channelHeaderSize:], packet)
  3057  	_, err := channel.transport.Write(channel.outboundBuffer[0 : channelHeaderSize+size])
  3058  	if err != nil {
  3059  		return errors.Trace(err)
  3060  	}
  3061  
  3062  	return nil
  3063  }
  3064  
  3065  // WriteFramedPackets writes a buffer of pre-framed packets to
  3066  // the channel.
  3067  // Concurrent calls to WriteFramedPackets are not supported.
  3068  func (channel *Channel) WriteFramedPackets(packetBuffer []byte) error {
  3069  	_, err := channel.transport.Write(packetBuffer)
  3070  	if err != nil {
  3071  		return errors.Trace(err)
  3072  	}
  3073  	return nil
  3074  }
  3075  
  3076  // Close interrupts any blocking Read/Write calls and
  3077  // closes the channel transport.
  3078  func (channel *Channel) Close() error {
  3079  	return channel.transport.Close()
  3080  }