github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/tunnel.go (about)

     1  /*
     2   * Copyright (c) 2015, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package psiphon
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"crypto/rand"
    26  	"encoding/base64"
    27  	"encoding/json"
    28  	std_errors "errors"
    29  	"fmt"
    30  	"io"
    31  	"io/ioutil"
    32  	"net"
    33  	"net/http"
    34  	"sync"
    35  	"sync/atomic"
    36  	"time"
    37  
    38  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    39  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/crypto/ssh"
    40  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    41  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/obfuscator"
    42  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
    43  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
    44  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
    45  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/quic"
    46  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/refraction"
    47  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tactics"
    48  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/transferstats"
    49  )
    50  
    51  // Tunneler specifies the interface required by components that use tunnels.
    52  type Tunneler interface {
    53  
    54  	// Dial creates a tunneled connection.
    55  	//
    56  	// When split tunnel mode is enabled, the connection may be untunneled,
    57  	// depending on GeoIP classification of the destination.
    58  	//
    59  	// downstreamConn is an optional parameter which specifies a connection to be
    60  	// explicitly closed when the Dialed connection is closed. For instance, this
    61  	// is used to close downstreamConn App<->LocalProxy connections when the related
    62  	// LocalProxy<->SshPortForward connections close.
    63  	Dial(remoteAddr string, downstreamConn net.Conn) (conn net.Conn, err error)
    64  
    65  	DirectDial(remoteAddr string) (conn net.Conn, err error)
    66  
    67  	SignalComponentFailure()
    68  }
    69  
    70  // TunnelOwner specifies the interface required by Tunnel to notify its
    71  // owner when it has failed. The owner may, as in the case of the Controller,
    72  // remove the tunnel from its list of active tunnels.
    73  type TunnelOwner interface {
    74  	SignalSeededNewSLOK()
    75  	SignalTunnelFailure(tunnel *Tunnel)
    76  }
    77  
    78  // Tunnel is a connection to a Psiphon server. An established
    79  // tunnel includes a network connection to the specified server
    80  // and an SSH session built on top of that transport.
    81  type Tunnel struct {
    82  	mutex                          *sync.Mutex
    83  	config                         *Config
    84  	isActivated                    bool
    85  	isDiscarded                    bool
    86  	isClosed                       bool
    87  	dialParams                     *DialParameters
    88  	livenessTestMetrics            *livenessTestMetrics
    89  	extraFailureAction             func()
    90  	serverContext                  *ServerContext
    91  	monitoringStartTime            time.Time
    92  	conn                           *common.BurstMonitoredConn
    93  	sshClient                      *ssh.Client
    94  	sshServerRequests              <-chan *ssh.Request
    95  	operateWaitGroup               *sync.WaitGroup
    96  	operateCtx                     context.Context
    97  	stopOperate                    context.CancelFunc
    98  	signalPortForwardFailure       chan struct{}
    99  	totalPortForwardFailures       int
   100  	adjustedEstablishStartTime     time.Time
   101  	establishDuration              time.Duration
   102  	establishedTime                time.Time
   103  	handledSSHKeepAliveFailure     int32
   104  	inFlightConnectedRequestSignal chan struct{}
   105  }
   106  
   107  // getCustomParameters helpers wrap the verbose function call chain required
   108  // to get a current snapshot of the parameters.Parameters customized with the
   109  // dial parameters associated with a tunnel.
   110  
   111  func (tunnel *Tunnel) getCustomParameters() parameters.ParametersAccessor {
   112  	return getCustomParameters(tunnel.config, tunnel.dialParams)
   113  }
   114  
   115  func getCustomParameters(
   116  	config *Config, dialParams *DialParameters) parameters.ParametersAccessor {
   117  	return config.GetParameters().GetCustom(dialParams.NetworkLatencyMultiplier)
   118  }
   119  
   120  // ConnectTunnel first makes a network transport connection to the
   121  // Psiphon server and then establishes an SSH client session on top of
   122  // that transport. The SSH server is authenticated using the public
   123  // key in the server entry.
   124  // Depending on the server's capabilities, the connection may use
   125  // plain SSH over TCP, obfuscated SSH over TCP, or obfuscated SSH over
   126  // HTTP (meek protocol).
   127  // When requiredProtocol is not blank, that protocol is used. Otherwise,
   128  // the a random supported protocol is used.
   129  //
   130  // Call Activate on a connected tunnel to complete its establishment
   131  // before using.
   132  //
   133  // Tunnel establishment is split into two phases: connection, and
   134  // activation. The Controller will run many ConnectTunnel calls
   135  // concurrently and then, to avoid unnecessary overhead from making
   136  // handshake requests and starting operateTunnel from tunnels which
   137  // may be discarded, call Activate on connected tunnels sequentially
   138  // as necessary.
   139  func ConnectTunnel(
   140  	ctx context.Context,
   141  	config *Config,
   142  	adjustedEstablishStartTime time.Time,
   143  	dialParams *DialParameters) (*Tunnel, error) {
   144  
   145  	// Build transport layers and establish SSH connection. Note that
   146  	// dialConn and monitoredConn are the same network connection.
   147  	dialResult, err := dialTunnel(ctx, config, dialParams)
   148  	if err != nil {
   149  		return nil, errors.Trace(err)
   150  	}
   151  
   152  	// The tunnel is now connected
   153  	return &Tunnel{
   154  		mutex:               new(sync.Mutex),
   155  		config:              config,
   156  		dialParams:          dialParams,
   157  		livenessTestMetrics: dialResult.livenessTestMetrics,
   158  		extraFailureAction:  dialResult.extraFailureAction,
   159  		monitoringStartTime: dialResult.monitoringStartTime,
   160  		conn:                dialResult.monitoredConn,
   161  		sshClient:           dialResult.sshClient,
   162  		sshServerRequests:   dialResult.sshRequests,
   163  		// A buffer allows at least one signal to be sent even when the receiver is
   164  		// not listening. Senders should not block.
   165  		signalPortForwardFailure:   make(chan struct{}, 1),
   166  		adjustedEstablishStartTime: adjustedEstablishStartTime,
   167  	}, nil
   168  }
   169  
   170  // Activate completes the tunnel establishment, performing the handshake
   171  // request and starting operateTunnel, the worker that monitors the tunnel
   172  // and handles periodic management.
   173  func (tunnel *Tunnel) Activate(
   174  	ctx context.Context, tunnelOwner TunnelOwner) (retErr error) {
   175  
   176  	// Ensure that, unless the base context is cancelled, any replayed dial
   177  	// parameters are cleared, no longer to be retried, if the tunnel fails to
   178  	// activate.
   179  	activationSucceeded := false
   180  	baseCtx := ctx
   181  	defer func() {
   182  		if !activationSucceeded && baseCtx.Err() != context.Canceled {
   183  			tunnel.dialParams.Failed(tunnel.config)
   184  			if tunnel.extraFailureAction != nil {
   185  				tunnel.extraFailureAction()
   186  			}
   187  			if retErr != nil {
   188  				_ = RecordFailedTunnelStat(
   189  					tunnel.config,
   190  					tunnel.dialParams,
   191  					tunnel.livenessTestMetrics,
   192  					-1,
   193  					-1,
   194  					retErr)
   195  			}
   196  		}
   197  	}()
   198  
   199  	// Create a new Psiphon API server context for this tunnel. This includes
   200  	// performing a handshake request. If the handshake fails, this activation
   201  	// fails.
   202  	var serverContext *ServerContext
   203  	if !tunnel.config.DisableApi {
   204  		NoticeInfo(
   205  			"starting server context for %s",
   206  			tunnel.dialParams.ServerEntry.GetDiagnosticID())
   207  
   208  		// Call NewServerContext in a goroutine, as it blocks on a network operation,
   209  		// the handshake request, and would block shutdown. If the shutdown signal is
   210  		// received, close the tunnel, which will interrupt the handshake request
   211  		// that may be blocking NewServerContext.
   212  		//
   213  		// Timeout after PsiphonApiServerTimeoutSeconds. NewServerContext may not
   214  		// return if the tunnel network connection is unstable during the handshake
   215  		// request. At this point, there is no operateTunnel monitor that will detect
   216  		// this condition with SSH keep alives.
   217  
   218  		timeout := tunnel.getCustomParameters().Duration(
   219  			parameters.PsiphonAPIRequestTimeout)
   220  
   221  		if timeout > 0 {
   222  			var cancelFunc context.CancelFunc
   223  			ctx, cancelFunc = context.WithTimeout(ctx, timeout)
   224  			defer cancelFunc()
   225  		}
   226  
   227  		type newServerContextResult struct {
   228  			serverContext *ServerContext
   229  			err           error
   230  		}
   231  
   232  		resultChannel := make(chan newServerContextResult)
   233  
   234  		go func() {
   235  			serverContext, err := NewServerContext(tunnel)
   236  			resultChannel <- newServerContextResult{
   237  				serverContext: serverContext,
   238  				err:           err,
   239  			}
   240  		}()
   241  
   242  		var result newServerContextResult
   243  
   244  		select {
   245  		case result = <-resultChannel:
   246  		case <-ctx.Done():
   247  			result.err = ctx.Err()
   248  			// Interrupt the goroutine
   249  			tunnel.Close(true)
   250  			<-resultChannel
   251  		}
   252  
   253  		if result.err != nil {
   254  			return errors.Trace(result.err)
   255  		}
   256  
   257  		serverContext = result.serverContext
   258  	}
   259  
   260  	// The activation succeeded.
   261  	activationSucceeded = true
   262  
   263  	tunnel.mutex.Lock()
   264  
   265  	// It may happen that the tunnel gets closed while Activate is running.
   266  	// In this case, abort here, to ensure that the operateTunnel goroutine
   267  	// will not be launched after Close is called.
   268  	if tunnel.isClosed {
   269  		return errors.TraceNew("tunnel is closed")
   270  	}
   271  
   272  	tunnel.isActivated = true
   273  	tunnel.serverContext = serverContext
   274  
   275  	// establishDuration is the elapsed time between the controller starting tunnel
   276  	// establishment and this tunnel being established. The reported value represents
   277  	// how long the user waited between starting the client and having a usable tunnel;
   278  	// or how long between the client detecting an unexpected tunnel disconnect and
   279  	// completing automatic reestablishment.
   280  	//
   281  	// This time period may include time spent unsuccessfully connecting to other
   282  	// servers. Time spent waiting for network connectivity is excluded.
   283  	tunnel.establishDuration = time.Since(tunnel.adjustedEstablishStartTime)
   284  	tunnel.establishedTime = time.Now()
   285  
   286  	// Use the Background context instead of the controller run context, as tunnels
   287  	// are terminated when the controller calls tunnel.Close.
   288  	tunnel.operateCtx, tunnel.stopOperate = context.WithCancel(context.Background())
   289  	tunnel.operateWaitGroup = new(sync.WaitGroup)
   290  
   291  	// Spawn the operateTunnel goroutine, which monitors the tunnel and handles periodic
   292  	// stats updates.
   293  	tunnel.operateWaitGroup.Add(1)
   294  	go tunnel.operateTunnel(tunnelOwner)
   295  
   296  	tunnel.mutex.Unlock()
   297  
   298  	return nil
   299  }
   300  
   301  // Close stops operating the tunnel and closes the underlying connection.
   302  // Supports multiple and/or concurrent calls to Close().
   303  // When isDiscarded is set, operateTunnel will not attempt to send final
   304  // status requests.
   305  func (tunnel *Tunnel) Close(isDiscarded bool) {
   306  
   307  	tunnel.mutex.Lock()
   308  	tunnel.isDiscarded = isDiscarded
   309  	isActivated := tunnel.isActivated
   310  	isClosed := tunnel.isClosed
   311  	tunnel.isClosed = true
   312  	tunnel.mutex.Unlock()
   313  
   314  	if !isClosed {
   315  
   316  		// Signal operateTunnel to stop before closing the tunnel -- this
   317  		// allows a final status request to be made in the case of an orderly
   318  		// shutdown.
   319  		// A timer is set, so if operateTunnel takes too long to stop, the
   320  		// tunnel is closed, which will interrupt any slow final status request.
   321  
   322  		if isActivated {
   323  			timeout := tunnel.getCustomParameters().Duration(
   324  				parameters.TunnelOperateShutdownTimeout)
   325  			afterFunc := time.AfterFunc(
   326  				timeout,
   327  				func() { tunnel.conn.Close() })
   328  			tunnel.stopOperate()
   329  			tunnel.operateWaitGroup.Wait()
   330  			afterFunc.Stop()
   331  		}
   332  
   333  		tunnel.sshClient.Close()
   334  		// tunnel.conn.Close() may get called multiple times, which is allowed.
   335  		tunnel.conn.Close()
   336  
   337  		err := tunnel.sshClient.Wait()
   338  		if err != nil {
   339  			NoticeWarning("close tunnel ssh error: %s", err)
   340  		}
   341  	}
   342  
   343  	// Log burst metrics now that the BurstMonitoredConn is closed.
   344  	// Metrics will be empty when burst monitoring is disabled.
   345  	if !isDiscarded && isActivated {
   346  		burstMetrics := tunnel.conn.GetMetrics(tunnel.monitoringStartTime)
   347  		if len(burstMetrics) > 0 {
   348  			NoticeBursts(
   349  				tunnel.dialParams.ServerEntry.GetDiagnosticID(),
   350  				burstMetrics)
   351  		}
   352  	}
   353  }
   354  
   355  // SetInFlightConnectedRequest checks if a connected request can begin and
   356  // sets the channel used to signal that the request is complete.
   357  //
   358  // The caller must not initiate a connected request when
   359  // SetInFlightConnectedRequest returns false. When SetInFlightConnectedRequest
   360  // returns true, the caller must call SetInFlightConnectedRequest(nil) when
   361  // the connected request completes.
   362  func (tunnel *Tunnel) SetInFlightConnectedRequest(requestSignal chan struct{}) bool {
   363  	tunnel.mutex.Lock()
   364  	defer tunnel.mutex.Unlock()
   365  
   366  	// If already closing, don't start a connected request: the
   367  	// TunnelOperateShutdownTimeout period may be nearly expired.
   368  	if tunnel.isClosed {
   369  		return false
   370  	}
   371  
   372  	if requestSignal == nil {
   373  		// Not already in-flight (not expected)
   374  		if tunnel.inFlightConnectedRequestSignal == nil {
   375  			return false
   376  		}
   377  	} else {
   378  		// Already in-flight (not expected)
   379  		if tunnel.inFlightConnectedRequestSignal != nil {
   380  			return false
   381  		}
   382  	}
   383  
   384  	tunnel.inFlightConnectedRequestSignal = requestSignal
   385  
   386  	return true
   387  }
   388  
   389  // AwaitInFlightConnectedRequest waits for the signal that any in-flight
   390  // connected request is complete.
   391  //
   392  // AwaitInFlightConnectedRequest may block until the connected request is
   393  // aborted by terminating the tunnel.
   394  func (tunnel *Tunnel) AwaitInFlightConnectedRequest() {
   395  	tunnel.mutex.Lock()
   396  	requestSignal := tunnel.inFlightConnectedRequestSignal
   397  	tunnel.mutex.Unlock()
   398  
   399  	if requestSignal != nil {
   400  		<-requestSignal
   401  	}
   402  }
   403  
   404  // IsActivated returns the tunnel's activated flag.
   405  func (tunnel *Tunnel) IsActivated() bool {
   406  	tunnel.mutex.Lock()
   407  	defer tunnel.mutex.Unlock()
   408  	return tunnel.isActivated
   409  }
   410  
   411  // IsDiscarded returns the tunnel's discarded flag.
   412  func (tunnel *Tunnel) IsDiscarded() bool {
   413  	tunnel.mutex.Lock()
   414  	defer tunnel.mutex.Unlock()
   415  	return tunnel.isDiscarded
   416  }
   417  
   418  // SendAPIRequest sends an API request as an SSH request through the tunnel.
   419  // This function blocks awaiting a response. Only one request may be in-flight
   420  // at once; a concurrent SendAPIRequest will block until an active request
   421  // receives its response (or the SSH connection is terminated).
   422  func (tunnel *Tunnel) SendAPIRequest(
   423  	name string, requestPayload []byte) ([]byte, error) {
   424  
   425  	ok, responsePayload, err := tunnel.sshClient.Conn.SendRequest(
   426  		name, true, requestPayload)
   427  
   428  	if err != nil {
   429  		return nil, errors.Trace(err)
   430  	}
   431  
   432  	if !ok {
   433  		return nil, errors.TraceNew("API request rejected")
   434  	}
   435  
   436  	return responsePayload, nil
   437  }
   438  
   439  // DialTCPChannel establishes a TCP port forward connection through the
   440  // tunnel.
   441  //
   442  // When split tunnel mode is enabled, and unless alwaysTunneled is set, the
   443  // server may reject the port forward and indicate that the client is to make
   444  // direct, untunneled connection. In this case, the bool return value is true
   445  // and net.Conn and error are nil.
   446  //
   447  // downstreamConn is an optional parameter which specifies a connection to be
   448  // explicitly closed when the dialed connection is closed.
   449  func (tunnel *Tunnel) DialTCPChannel(
   450  	remoteAddr string,
   451  	alwaysTunneled bool,
   452  	downstreamConn net.Conn) (net.Conn, bool, error) {
   453  
   454  	channelType := "direct-tcpip"
   455  	if alwaysTunneled && tunnel.config.IsSplitTunnelEnabled() {
   456  		// This channel type is only necessary in split tunnel mode.
   457  		channelType = protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE
   458  	}
   459  
   460  	channel, err := tunnel.dialChannel(channelType, remoteAddr)
   461  	if err != nil {
   462  		if isSplitTunnelRejectReason(err) {
   463  			return nil, true, nil
   464  		}
   465  		return nil, false, errors.Trace(err)
   466  	}
   467  
   468  	netConn, ok := channel.(net.Conn)
   469  	if !ok {
   470  		return nil, false, errors.Tracef("unexpected channel type: %T", channel)
   471  	}
   472  
   473  	conn := &TunneledConn{
   474  		Conn:           netConn,
   475  		tunnel:         tunnel,
   476  		downstreamConn: downstreamConn}
   477  
   478  	return tunnel.wrapWithTransferStats(conn), false, nil
   479  }
   480  
   481  func (tunnel *Tunnel) DialPacketTunnelChannel() (net.Conn, error) {
   482  
   483  	channel, err := tunnel.dialChannel(protocol.PACKET_TUNNEL_CHANNEL_TYPE, "")
   484  	if err != nil {
   485  		return nil, errors.Trace(err)
   486  	}
   487  
   488  	sshChannel, ok := channel.(ssh.Channel)
   489  	if !ok {
   490  		return nil, errors.Tracef("unexpected channel type: %T", channel)
   491  	}
   492  
   493  	NoticeInfo("DialPacketTunnelChannel: established channel")
   494  
   495  	conn := newChannelConn(sshChannel)
   496  
   497  	// wrapWithTransferStats will track bytes transferred for the
   498  	// packet tunnel. It will count packet overhead (TCP/UDP/IP headers).
   499  	//
   500  	// Since the data in the channel is not HTTP or TLS, no domain bytes
   501  	// counting is expected.
   502  	//
   503  	// transferstats are also used to determine that there's been recent
   504  	// activity and skip periodic SSH keep alives; see Tunnel.operateTunnel.
   505  
   506  	return tunnel.wrapWithTransferStats(conn), nil
   507  }
   508  
   509  func (tunnel *Tunnel) dialChannel(channelType, remoteAddr string) (interface{}, error) {
   510  
   511  	if !tunnel.IsActivated() {
   512  		return nil, errors.TraceNew("tunnel is not activated")
   513  	}
   514  
   515  	// Note: there is no dial context since SSH port forward dials cannot
   516  	// be interrupted directly. Closing the tunnel will interrupt the dials.
   517  	// A timeout is set to unblock this function, but the goroutine may
   518  	// not exit until the tunnel is closed.
   519  
   520  	type channelDialResult struct {
   521  		channel interface{}
   522  		err     error
   523  	}
   524  
   525  	// Use a buffer of 1 as there are two senders and only one guaranteed receive.
   526  
   527  	results := make(chan *channelDialResult, 1)
   528  
   529  	afterFunc := time.AfterFunc(
   530  		tunnel.getCustomParameters().Duration(
   531  			parameters.TunnelPortForwardDialTimeout),
   532  		func() {
   533  			results <- &channelDialResult{
   534  				nil, errors.Tracef("channel dial timeout: %s", channelType)}
   535  		})
   536  	defer afterFunc.Stop()
   537  
   538  	go func() {
   539  		result := new(channelDialResult)
   540  		switch channelType {
   541  
   542  		case "direct-tcpip", protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE:
   543  			// The protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE is the same as
   544  			// "direct-tcpip", except split tunnel channel rejections are disallowed
   545  			// even when split tunnel mode is enabled.
   546  			result.channel, result.err =
   547  				tunnel.sshClient.Dial(channelType, remoteAddr)
   548  
   549  		default:
   550  			var sshRequests <-chan *ssh.Request
   551  			result.channel, sshRequests, result.err =
   552  				tunnel.sshClient.OpenChannel(channelType, nil)
   553  			if result.err == nil {
   554  				go ssh.DiscardRequests(sshRequests)
   555  			}
   556  		}
   557  		if result.err != nil {
   558  			result.err = errors.Trace(result.err)
   559  		}
   560  		results <- result
   561  	}()
   562  
   563  	result := <-results
   564  
   565  	if result.err != nil {
   566  		if !isSplitTunnelRejectReason(result.err) {
   567  			select {
   568  			case tunnel.signalPortForwardFailure <- struct{}{}:
   569  			default:
   570  			}
   571  		}
   572  		return nil, errors.Trace(result.err)
   573  	}
   574  
   575  	return result.channel, nil
   576  }
   577  
   578  func isSplitTunnelRejectReason(err error) bool {
   579  
   580  	var openChannelErr *ssh.OpenChannelError
   581  	if std_errors.As(err, &openChannelErr) {
   582  		return openChannelErr.Reason ==
   583  			ssh.RejectionReason(protocol.CHANNEL_REJECT_REASON_SPLIT_TUNNEL)
   584  	}
   585  
   586  	return false
   587  }
   588  
   589  func (tunnel *Tunnel) wrapWithTransferStats(conn net.Conn) net.Conn {
   590  
   591  	// Tunnel does not have a serverContext when DisableApi is set. We still use
   592  	// transferstats.Conn to count bytes transferred for monitoring tunnel
   593  	// quality.
   594  	var regexps *transferstats.Regexps
   595  	if tunnel.serverContext != nil {
   596  		regexps = tunnel.serverContext.StatsRegexps()
   597  	}
   598  
   599  	return transferstats.NewConn(
   600  		conn, tunnel.dialParams.ServerEntry.IpAddress, regexps)
   601  }
   602  
   603  // SignalComponentFailure notifies the tunnel that an associated component has failed.
   604  // This will terminate the tunnel.
   605  func (tunnel *Tunnel) SignalComponentFailure() {
   606  	NoticeWarning("tunnel received component failure signal")
   607  	tunnel.Close(false)
   608  }
   609  
   610  // TunneledConn implements net.Conn and wraps a port forward connection.
   611  // It is used to hook into Read and Write to observe I/O errors and
   612  // report these errors back to the tunnel monitor as port forward failures.
   613  // TunneledConn optionally tracks a peer connection to be explicitly closed
   614  // when the TunneledConn is closed.
   615  type TunneledConn struct {
   616  	net.Conn
   617  	tunnel         *Tunnel
   618  	downstreamConn net.Conn
   619  }
   620  
   621  func (conn *TunneledConn) Read(buffer []byte) (n int, err error) {
   622  	n, err = conn.Conn.Read(buffer)
   623  	if err != nil && err != io.EOF {
   624  		// Report new failure. Won't block; assumes the receiver
   625  		// has a sufficient buffer for the threshold number of reports.
   626  		// TODO: conditional on type of error or error message?
   627  		select {
   628  		case conn.tunnel.signalPortForwardFailure <- struct{}{}:
   629  		default:
   630  		}
   631  	}
   632  	return
   633  }
   634  
   635  func (conn *TunneledConn) Write(buffer []byte) (n int, err error) {
   636  	n, err = conn.Conn.Write(buffer)
   637  	if err != nil && err != io.EOF {
   638  		// Same as TunneledConn.Read()
   639  		select {
   640  		case conn.tunnel.signalPortForwardFailure <- struct{}{}:
   641  		default:
   642  		}
   643  	}
   644  	return
   645  }
   646  
   647  func (conn *TunneledConn) Close() error {
   648  	if conn.downstreamConn != nil {
   649  		conn.downstreamConn.Close()
   650  	}
   651  	return conn.Conn.Close()
   652  }
   653  
   654  type dialResult struct {
   655  	dialConn            net.Conn
   656  	monitoringStartTime time.Time
   657  	monitoredConn       *common.BurstMonitoredConn
   658  	sshClient           *ssh.Client
   659  	sshRequests         <-chan *ssh.Request
   660  	livenessTestMetrics *livenessTestMetrics
   661  	extraFailureAction  func()
   662  }
   663  
   664  // dialTunnel is a helper that builds the transport layers and establishes the
   665  // SSH connection. When additional dial configuration is used, dial metrics
   666  // are recorded and returned.
   667  func dialTunnel(
   668  	ctx context.Context,
   669  	config *Config,
   670  	dialParams *DialParameters) (_ *dialResult, retErr error) {
   671  
   672  	// Return immediately when overall context is canceled or timed-out. This
   673  	// avoids notice noise.
   674  	err := ctx.Err()
   675  	if err != nil {
   676  		return nil, errors.Trace(err)
   677  	}
   678  
   679  	p := getCustomParameters(config, dialParams)
   680  	timeout := p.Duration(parameters.TunnelConnectTimeout)
   681  	rateLimits := p.RateLimits(parameters.TunnelRateLimits)
   682  	obfuscatedSSHMinPadding := p.Int(parameters.ObfuscatedSSHMinPadding)
   683  	obfuscatedSSHMaxPadding := p.Int(parameters.ObfuscatedSSHMaxPadding)
   684  	livenessTestMinUpstreamBytes := p.Int(parameters.LivenessTestMinUpstreamBytes)
   685  	livenessTestMaxUpstreamBytes := p.Int(parameters.LivenessTestMaxUpstreamBytes)
   686  	livenessTestMinDownstreamBytes := p.Int(parameters.LivenessTestMinDownstreamBytes)
   687  	livenessTestMaxDownstreamBytes := p.Int(parameters.LivenessTestMaxDownstreamBytes)
   688  	burstUpstreamTargetBytes := int64(p.Int(parameters.ClientBurstUpstreamTargetBytes))
   689  	burstUpstreamDeadline := p.Duration(parameters.ClientBurstUpstreamDeadline)
   690  	burstDownstreamTargetBytes := int64(p.Int(parameters.ClientBurstDownstreamTargetBytes))
   691  	burstDownstreamDeadline := p.Duration(parameters.ClientBurstDownstreamDeadline)
   692  	p.Close()
   693  
   694  	// Ensure that, unless the base context is cancelled, any replayed dial
   695  	// parameters are cleared, no longer to be retried, if the tunnel fails to
   696  	// connect.
   697  	//
   698  	// Limitation: dials that fail to connect due to the server being in a
   699  	// load-limiting state are not distinguished and excepted from this
   700  	// logic.
   701  	dialSucceeded := false
   702  	baseCtx := ctx
   703  	var failedTunnelLivenessTestMetrics *livenessTestMetrics
   704  	var extraFailureAction func()
   705  	defer func() {
   706  		if !dialSucceeded && baseCtx.Err() != context.Canceled {
   707  			dialParams.Failed(config)
   708  			if extraFailureAction != nil {
   709  				extraFailureAction()
   710  			}
   711  			if retErr != nil {
   712  				_ = RecordFailedTunnelStat(
   713  					config,
   714  					dialParams,
   715  					failedTunnelLivenessTestMetrics,
   716  					-1,
   717  					-1,
   718  					retErr)
   719  			}
   720  		}
   721  	}()
   722  
   723  	var cancelFunc context.CancelFunc
   724  	ctx, cancelFunc = context.WithTimeout(ctx, timeout)
   725  	defer cancelFunc()
   726  
   727  	// DialDuration is the elapsed time for both successful and failed tunnel
   728  	// dials. For successful tunnels, it includes any the network protocol
   729  	// handshake(s), obfuscation protocol handshake(s), SSH handshake, and
   730  	// liveness test, when performed.
   731  	//
   732  	// Note: ensure DialDuration is set before calling any function which logs
   733  	// dial_duration.
   734  
   735  	startDialTime := time.Now()
   736  	defer func() {
   737  		dialParams.DialDuration = time.Since(startDialTime)
   738  	}()
   739  
   740  	// Note: dialParams.MeekResolvedIPAddress isn't set until the dial begins,
   741  	// so it will always be blank in NoticeConnectingServer.
   742  
   743  	NoticeConnectingServer(dialParams)
   744  
   745  	// Create the base transport: meek or direct connection
   746  
   747  	var dialConn net.Conn
   748  
   749  	if protocol.TunnelProtocolUsesMeek(dialParams.TunnelProtocol) {
   750  
   751  		dialConn, err = DialMeek(
   752  			ctx,
   753  			dialParams.GetMeekConfig(),
   754  			dialParams.GetDialConfig())
   755  		if err != nil {
   756  			return nil, errors.Trace(err)
   757  		}
   758  
   759  	} else if protocol.TunnelProtocolUsesQUIC(dialParams.TunnelProtocol) {
   760  
   761  		packetConn, remoteAddr, err := NewUDPConn(
   762  			ctx,
   763  			dialParams.DirectDialAddress,
   764  			dialParams.GetDialConfig())
   765  		if err != nil {
   766  			return nil, errors.Trace(err)
   767  		}
   768  
   769  		dialConn, err = quic.Dial(
   770  			ctx,
   771  			packetConn,
   772  			remoteAddr,
   773  			dialParams.QUICDialSNIAddress,
   774  			dialParams.QUICVersion,
   775  			dialParams.QUICClientHelloSeed,
   776  			dialParams.ServerEntry.SshObfuscatedKey,
   777  			dialParams.ObfuscatedQUICPaddingSeed,
   778  			dialParams.QUICDisablePathMTUDiscovery)
   779  		if err != nil {
   780  			return nil, errors.Trace(err)
   781  		}
   782  
   783  	} else if protocol.TunnelProtocolUsesTapDance(dialParams.TunnelProtocol) {
   784  
   785  		dialConn, err = refraction.DialTapDance(
   786  			ctx,
   787  			config.EmitRefractionNetworkingLogs,
   788  			config.GetPsiphonDataDirectory(),
   789  			NewNetDialer(dialParams.GetDialConfig()),
   790  			dialParams.DirectDialAddress)
   791  		if err != nil {
   792  			return nil, errors.Trace(err)
   793  		}
   794  
   795  	} else if protocol.TunnelProtocolUsesConjure(dialParams.TunnelProtocol) {
   796  
   797  		// Specify a cache key with a scope that ensures that:
   798  		//
   799  		// (a) cached registrations aren't used across different networks, as a
   800  		// registration requires the client's public IP to match the value at time
   801  		// of registration;
   802  		//
   803  		// (b) cached registrations are associated with specific Psiphon server
   804  		// candidates, to ensure that replay will use the same phantom IP(s).
   805  		//
   806  		// This scheme allows for reuse of cached registrations on network A when a
   807  		// client roams from network A to network B and back to network A.
   808  		//
   809  		// Using the network ID as a proxy for client public IP address is a
   810  		// heurisitic: it's possible that a clients public IP address changes
   811  		// without the network ID changing, and it's not guaranteed that the client
   812  		// will be assigned the original public IP on network A; so there's some
   813  		// chance the registration cannot be reused.
   814  
   815  		diagnosticID := dialParams.ServerEntry.GetDiagnosticID()
   816  
   817  		cacheKey := dialParams.NetworkID + "-" + diagnosticID
   818  
   819  		conjureConfig := &refraction.ConjureConfig{
   820  			RegistrationCacheTTL: dialParams.ConjureCachedRegistrationTTL,
   821  			RegistrationCacheKey: cacheKey,
   822  			Transport:            dialParams.ConjureTransport,
   823  			DiagnosticID:         diagnosticID,
   824  			Logger:               NoticeCommonLogger(),
   825  		}
   826  
   827  		// Set extraFailureAction, which is invoked whenever the tunnel fails (i.e.,
   828  		// where RecordFailedTunnelStat is invoked). The action will remove any
   829  		// cached registration. When refraction.DialConjure succeeds, the underlying
   830  		// registration is cached. After refraction.DialConjure returns, it no
   831  		// longer modifies the cached state of that registration, assuming that it
   832  		// remains valid and effective. However adversarial impact on a given
   833  		// phantom IP may not become evident until after the initial TCP connection
   834  		// establishment and handshake performed by refraction.DialConjure. For
   835  		// example, it may be that the phantom dial is targeted for severe
   836  		// throttling which begins or is only evident later in the flow. Scheduling
   837  		// a call to DeleteCachedConjureRegistration allows us to invalidate the
   838  		// cached registration for a tunnel that fails later in its lifecycle.
   839  		//
   840  		// Note that extraFailureAction will retain a reference to conjureConfig for
   841  		// the lifetime of the tunnel.
   842  		extraFailureAction = func() {
   843  			refraction.DeleteCachedConjureRegistration(conjureConfig)
   844  		}
   845  
   846  		if dialParams.ConjureAPIRegistration {
   847  
   848  			// Use MeekConn to domain front Conjure API registration.
   849  			//
   850  			// ConjureAPIRegistrarFrontingSpecs are applied via
   851  			// dialParams.GetMeekConfig, and will be subject to replay.
   852  			//
   853  			// Since DialMeek will create a TLS connection immediately, and a cached
   854  			// registration may be used, we will delay initializing the MeekConn-based
   855  			// RoundTripper until we know it's needed. This is implemented by passing
   856  			// in a RoundTripper that establishes a MeekConn when RoundTrip is called.
   857  			//
   858  			// In refraction.dial we configure 0 retries for API registration requests,
   859  			// assuming it's better to let another Psiphon candidate retry, with new
   860  			// domaing fronting parameters. As such, we expect only one round trip call
   861  			// per NewHTTPRoundTripper, so, in practise, there's no performance penalty
   862  			// from establishing a new MeekConn per round trip.
   863  			//
   864  			// Performing the full DialMeek/RoundTrip operation here allows us to call
   865  			// MeekConn.Close and ensure all resources are immediately cleaned up.
   866  			roundTrip := func(request *http.Request) (*http.Response, error) {
   867  
   868  				conn, err := DialMeek(
   869  					ctx, dialParams.GetMeekConfig(), dialParams.GetDialConfig())
   870  				if err != nil {
   871  					return nil, errors.Trace(err)
   872  				}
   873  				defer conn.Close()
   874  
   875  				response, err := conn.RoundTrip(request)
   876  				if err != nil {
   877  					return nil, errors.Trace(err)
   878  				}
   879  
   880  				// Read the response into a buffer and close the response
   881  				// body, ensuring that MeekConn.Close closes all idle connections.
   882  				//
   883  				// Alternatively, we could Clone the request to set
   884  				// http.Request.Close and avoid keeping any idle connection
   885  				// open after the response body is read by gotapdance. Since
   886  				// the response body is small and since gotapdance does not
   887  				// stream the response body, we're taking this approach which
   888  				// ensures cleanup.
   889  
   890  				body, err := ioutil.ReadAll(response.Body)
   891  				_ = response.Body.Close()
   892  				if err != nil {
   893  					return nil, errors.Trace(err)
   894  				}
   895  				response.Body = io.NopCloser(bytes.NewReader(body))
   896  
   897  				return response, nil
   898  			}
   899  
   900  			conjureConfig.APIRegistrarHTTPClient = &http.Client{
   901  				Transport: common.NewHTTPRoundTripper(roundTrip),
   902  			}
   903  
   904  			conjureConfig.APIRegistrarBidirectionalURL =
   905  				dialParams.ConjureAPIRegistrarBidirectionalURL
   906  			conjureConfig.APIRegistrarDelay = dialParams.ConjureAPIRegistrarDelay
   907  
   908  		} else if dialParams.ConjureDecoyRegistration {
   909  
   910  			// The Conjure "phantom" connection is compatible with fragmentation, but
   911  			// the decoy registrar connection, like Tapdance, is not, so force it off.
   912  			// Any tunnel fragmentation metrics will refer to the "phantom" connection
   913  			// only.
   914  			conjureConfig.DecoyRegistrarDialer = NewNetDialer(
   915  				dialParams.GetDialConfig().WithoutFragmentor())
   916  			conjureConfig.DecoyRegistrarWidth = dialParams.ConjureDecoyRegistrarWidth
   917  			conjureConfig.DecoyRegistrarDelay = dialParams.ConjureDecoyRegistrarDelay
   918  		}
   919  
   920  		dialConn, err = refraction.DialConjure(
   921  			ctx,
   922  			config.EmitRefractionNetworkingLogs,
   923  			config.GetPsiphonDataDirectory(),
   924  			NewNetDialer(dialParams.GetDialConfig()),
   925  			dialParams.DirectDialAddress,
   926  			conjureConfig)
   927  		if err != nil {
   928  			return nil, errors.Trace(err)
   929  		}
   930  
   931  	} else {
   932  
   933  		dialConn, err = DialTCP(
   934  			ctx,
   935  			dialParams.DirectDialAddress,
   936  			dialParams.GetDialConfig())
   937  		if err != nil {
   938  			return nil, errors.Trace(err)
   939  		}
   940  	}
   941  
   942  	// Some conns report additional metrics. fragmentor.Conns report
   943  	// fragmentor configs.
   944  	if metricsSource, ok := dialConn.(common.MetricsSource); ok {
   945  		dialParams.DialConnMetrics = metricsSource
   946  	}
   947  
   948  	if noticeMetricsSource, ok := dialConn.(common.NoticeMetricsSource); ok {
   949  		dialParams.DialConnNoticeMetrics = noticeMetricsSource
   950  	}
   951  
   952  	// If dialConn is not a Closer, tunnel failure detection may be slower
   953  	if _, ok := dialConn.(common.Closer); !ok {
   954  		NoticeWarning("tunnel.dialTunnel: dialConn is not a Closer")
   955  	}
   956  
   957  	cleanupConn := dialConn
   958  	defer func() {
   959  		// Cleanup on error
   960  		if cleanupConn != nil {
   961  			cleanupConn.Close()
   962  		}
   963  	}()
   964  
   965  	monitoringStartTime := time.Now()
   966  	monitoredConn := common.NewBurstMonitoredConn(
   967  		dialConn,
   968  		false,
   969  		burstUpstreamTargetBytes, burstUpstreamDeadline,
   970  		burstDownstreamTargetBytes, burstDownstreamDeadline)
   971  
   972  	// Apply throttling (if configured)
   973  	throttledConn := common.NewThrottledConn(
   974  		monitoredConn,
   975  		rateLimits)
   976  
   977  	// Add obfuscated SSH layer
   978  	var sshConn net.Conn = throttledConn
   979  	if protocol.TunnelProtocolUsesObfuscatedSSH(dialParams.TunnelProtocol) {
   980  		obfuscatedSSHConn, err := obfuscator.NewClientObfuscatedSSHConn(
   981  			throttledConn,
   982  			dialParams.ServerEntry.SshObfuscatedKey,
   983  			dialParams.ObfuscatorPaddingSeed,
   984  			&obfuscatedSSHMinPadding,
   985  			&obfuscatedSSHMaxPadding)
   986  		if err != nil {
   987  			return nil, errors.Trace(err)
   988  		}
   989  		sshConn = obfuscatedSSHConn
   990  		dialParams.ObfuscatedSSHConnMetrics = obfuscatedSSHConn
   991  	}
   992  
   993  	// Now establish the SSH session over the conn transport
   994  	expectedPublicKey, err := base64.StdEncoding.DecodeString(
   995  		dialParams.ServerEntry.SshHostKey)
   996  	if err != nil {
   997  		return nil, errors.Trace(err)
   998  	}
   999  	sshCertChecker := &ssh.CertChecker{
  1000  		IsHostAuthority: func(auth ssh.PublicKey, address string) bool {
  1001  			// Psiphon servers do not currently use SSH certificates. This CertChecker
  1002  			// code path may still be hit if a client attempts to connect using an
  1003  			// obsolete server entry.
  1004  			return false
  1005  		},
  1006  		HostKeyFallback: func(addr string, remote net.Addr, publicKey ssh.PublicKey) error {
  1007  			if !bytes.Equal(expectedPublicKey, publicKey.Marshal()) {
  1008  				return errors.TraceNew("unexpected host public key")
  1009  			}
  1010  			return nil
  1011  		},
  1012  	}
  1013  
  1014  	sshPasswordPayload := &protocol.SSHPasswordPayload{
  1015  		SessionId:          config.SessionID,
  1016  		SshPassword:        dialParams.ServerEntry.SshPassword,
  1017  		ClientCapabilities: []string{protocol.CLIENT_CAPABILITY_SERVER_REQUESTS},
  1018  	}
  1019  
  1020  	payload, err := json.Marshal(sshPasswordPayload)
  1021  	if err != nil {
  1022  		return nil, errors.Trace(err)
  1023  	}
  1024  
  1025  	sshClientConfig := &ssh.ClientConfig{
  1026  		User: dialParams.ServerEntry.SshUsername,
  1027  		Auth: []ssh.AuthMethod{
  1028  			ssh.Password(string(payload)),
  1029  		},
  1030  		HostKeyCallback: sshCertChecker.CheckHostKey,
  1031  		ClientVersion:   dialParams.SSHClientVersion,
  1032  	}
  1033  
  1034  	sshClientConfig.KEXPRNGSeed = dialParams.SSHKEXSeed
  1035  
  1036  	if protocol.TunnelProtocolUsesObfuscatedSSH(dialParams.TunnelProtocol) {
  1037  		if config.ObfuscatedSSHAlgorithms != nil {
  1038  			sshClientConfig.KeyExchanges = []string{config.ObfuscatedSSHAlgorithms[0]}
  1039  			sshClientConfig.Ciphers = []string{config.ObfuscatedSSHAlgorithms[1]}
  1040  			sshClientConfig.MACs = []string{config.ObfuscatedSSHAlgorithms[2]}
  1041  			sshClientConfig.HostKeyAlgorithms = []string{config.ObfuscatedSSHAlgorithms[3]}
  1042  
  1043  		} else {
  1044  			// With Encrypt-then-MAC hash algorithms, packet length is
  1045  			// transmitted in plaintext, which aids in traffic analysis.
  1046  			//
  1047  			// TUNNEL_PROTOCOL_SSH is excepted since its KEX appears in plaintext,
  1048  			// and the protocol is intended to look like SSH on the wire.
  1049  			sshClientConfig.NoEncryptThenMACHash = true
  1050  		}
  1051  	} else {
  1052  		// For TUNNEL_PROTOCOL_SSH only, the server is expected to randomize
  1053  		// its KEX; setting PeerKEXPRNGSeed will ensure successful negotiation
  1054  		// betweem two randomized KEXes.
  1055  		if dialParams.ServerEntry.SshObfuscatedKey != "" {
  1056  			sshClientConfig.PeerKEXPRNGSeed, err = protocol.DeriveSSHServerKEXPRNGSeed(
  1057  				dialParams.ServerEntry.SshObfuscatedKey)
  1058  			if err != nil {
  1059  				return nil, errors.Trace(err)
  1060  			}
  1061  		}
  1062  	}
  1063  
  1064  	// The ssh session establishment (via ssh.NewClientConn) is wrapped
  1065  	// in a timeout to ensure it won't hang. We've encountered firewalls
  1066  	// that allow the TCP handshake to complete but then send a RST to the
  1067  	// server-side and nothing to the client-side, and if that happens
  1068  	// while ssh.NewClientConn is reading, it may wait forever. The timeout
  1069  	// closes the conn, which interrupts it.
  1070  	// Note: TCP handshake timeouts are provided by TCPConn, and session
  1071  	// timeouts *after* ssh establishment are provided by the ssh keep alive
  1072  	// in operate tunnel.
  1073  
  1074  	type sshNewClientResult struct {
  1075  		sshClient           *ssh.Client
  1076  		sshRequests         <-chan *ssh.Request
  1077  		livenessTestMetrics *livenessTestMetrics
  1078  		err                 error
  1079  	}
  1080  
  1081  	resultChannel := make(chan sshNewClientResult)
  1082  
  1083  	// Call NewClientConn in a goroutine, as it blocks on SSH handshake network
  1084  	// operations, and would block canceling or shutdown. If the parent context
  1085  	// is canceled, close the net.Conn underlying SSH, which will interrupt the
  1086  	// SSH handshake that may be blocking NewClientConn.
  1087  
  1088  	go func() {
  1089  		// The following is adapted from ssh.Dial(), here using a custom conn
  1090  		// The sshAddress is passed through to host key verification callbacks; we don't use it.
  1091  		sshAddress := ""
  1092  		sshClientConn, sshChannels, sshRequests, err := ssh.NewClientConn(
  1093  			sshConn, sshAddress, sshClientConfig)
  1094  
  1095  		var sshClient *ssh.Client
  1096  		var metrics *livenessTestMetrics
  1097  
  1098  		if err == nil {
  1099  
  1100  			// sshRequests is handled by operateTunnel.
  1101  			// ssh.NewClient also expects to handle the sshRequests
  1102  			// value from ssh.NewClientConn and will spawn a goroutine
  1103  			// to handle the  <-chan *ssh.Request, so we must provide
  1104  			// a closed channel to ensure that goroutine halts instead
  1105  			// of hanging on a nil channel.
  1106  			noRequests := make(chan *ssh.Request)
  1107  			close(noRequests)
  1108  
  1109  			sshClient = ssh.NewClient(sshClientConn, sshChannels, noRequests)
  1110  
  1111  			if livenessTestMaxUpstreamBytes > 0 || livenessTestMaxDownstreamBytes > 0 {
  1112  
  1113  				// When configured, perform a liveness test which sends and
  1114  				// receives bytes through the tunnel to ensure the tunnel had
  1115  				// not been blocked upon or shortly after connecting. This
  1116  				// test is performed concurrently for each establishment
  1117  				// candidate before selecting a successful tunnel.
  1118  				//
  1119  				// Note that the liveness test is subject to the
  1120  				// TunnelConnectTimeout, which should be adjusted
  1121  				// accordinging.
  1122  
  1123  				metrics, err = performLivenessTest(
  1124  					sshClient,
  1125  					livenessTestMinUpstreamBytes, livenessTestMaxUpstreamBytes,
  1126  					livenessTestMinDownstreamBytes, livenessTestMaxDownstreamBytes,
  1127  					dialParams.LivenessTestSeed)
  1128  
  1129  				// Skip notice when cancelling.
  1130  				if baseCtx.Err() == nil {
  1131  					NoticeLivenessTest(
  1132  						dialParams.ServerEntry.GetDiagnosticID(), metrics, err == nil)
  1133  				}
  1134  			}
  1135  		}
  1136  
  1137  		resultChannel <- sshNewClientResult{sshClient, sshRequests, metrics, err}
  1138  	}()
  1139  
  1140  	var result sshNewClientResult
  1141  
  1142  	select {
  1143  	case result = <-resultChannel:
  1144  	case <-ctx.Done():
  1145  
  1146  		// Interrupt the goroutine and capture its error context to
  1147  		// distinguish point of failure.
  1148  		err := ctx.Err()
  1149  		sshConn.Close()
  1150  		result = <-resultChannel
  1151  		if result.err != nil {
  1152  			result.err = fmt.Errorf("%s: %s", err, result.err)
  1153  		} else {
  1154  			result.err = err
  1155  		}
  1156  	}
  1157  
  1158  	if result.err != nil {
  1159  		failedTunnelLivenessTestMetrics = result.livenessTestMetrics
  1160  		return nil, errors.Trace(result.err)
  1161  	}
  1162  
  1163  	dialSucceeded = true
  1164  
  1165  	NoticeConnectedServer(dialParams)
  1166  
  1167  	cleanupConn = nil
  1168  
  1169  	// Invoke DNS cache extension (if enabled in the resolver) now that the
  1170  	// tunnel is connected and the Psiphon server is authenticated. This
  1171  	// demonstrates that any domain name resolved to an endpoint that is or
  1172  	// is forwarded to the expected Psiphon server.
  1173  	//
  1174  	// Limitation: DNS cache extension is not implemented for Refraction
  1175  	// Networking protocols. iOS VPN, the primary use case for DNS cache
  1176  	// extension, does not enable Refraction Networking.
  1177  	if protocol.TunnelProtocolUsesFrontedMeek(dialParams.TunnelProtocol) {
  1178  		resolver := config.GetResolver()
  1179  		if resolver != nil {
  1180  			resolver.VerifyCacheExtension(dialParams.MeekFrontingDialAddress)
  1181  		}
  1182  	}
  1183  
  1184  	// When configured to do so, hold-off on activating this tunnel. This allows
  1185  	// some extra time for slower but less resource intensive protocols to
  1186  	// establish tunnels. By holding off post-connect, the client has this
  1187  	// established tunnel ready to activate in case other protocols fail to
  1188  	// establish. This hold-off phase continues to consume one connection worker.
  1189  	//
  1190  	// The network latency multiplier is not applied to HoldOffTunnelDuration,
  1191  	// as the goal is to apply a consistent hold-off range across all tunnel
  1192  	// candidates; and this avoids scaling up any delay users experience.
  1193  	//
  1194  	// The hold-off is applied regardless of whether this is the first tunnel
  1195  	// in a session or a reconnection, even to a server affinity candidate,
  1196  	// so that the advantage for other protocols persists.
  1197  
  1198  	if dialParams.HoldOffTunnelDuration > 0 {
  1199  		NoticeHoldOffTunnel(dialParams.ServerEntry.GetDiagnosticID(), dialParams.HoldOffTunnelDuration)
  1200  		common.SleepWithContext(ctx, dialParams.HoldOffTunnelDuration)
  1201  	}
  1202  
  1203  	// Note: dialConn may be used to close the underlying network connection
  1204  	// but should not be used to perform I/O as that would interfere with SSH
  1205  	// (and also bypasses throttling).
  1206  
  1207  	return &dialResult{
  1208  			dialConn:            dialConn,
  1209  			monitoringStartTime: monitoringStartTime,
  1210  			monitoredConn:       monitoredConn,
  1211  			sshClient:           result.sshClient,
  1212  			sshRequests:         result.sshRequests,
  1213  			livenessTestMetrics: result.livenessTestMetrics,
  1214  			extraFailureAction:  extraFailureAction,
  1215  		},
  1216  		nil
  1217  }
  1218  
  1219  // Fields are exported for JSON encoding in NoticeLivenessTest.
  1220  type livenessTestMetrics struct {
  1221  	Duration                string
  1222  	UpstreamBytes           int
  1223  	SentUpstreamBytes       int
  1224  	DownstreamBytes         int
  1225  	ReceivedDownstreamBytes int
  1226  }
  1227  
  1228  func performLivenessTest(
  1229  	sshClient *ssh.Client,
  1230  	minUpstreamBytes, maxUpstreamBytes int,
  1231  	minDownstreamBytes, maxDownstreamBytes int,
  1232  	livenessTestPRNGSeed *prng.Seed) (*livenessTestMetrics, error) {
  1233  
  1234  	metrics := new(livenessTestMetrics)
  1235  
  1236  	defer func(startTime time.Time) {
  1237  		metrics.Duration = time.Since(startTime).String()
  1238  	}(time.Now())
  1239  
  1240  	PRNG := prng.NewPRNGWithSeed(livenessTestPRNGSeed)
  1241  
  1242  	metrics.UpstreamBytes = PRNG.Range(minUpstreamBytes, maxUpstreamBytes)
  1243  	metrics.DownstreamBytes = PRNG.Range(minDownstreamBytes, maxDownstreamBytes)
  1244  
  1245  	request := &protocol.RandomStreamRequest{
  1246  		UpstreamBytes:   metrics.UpstreamBytes,
  1247  		DownstreamBytes: metrics.DownstreamBytes,
  1248  	}
  1249  
  1250  	extraData, err := json.Marshal(request)
  1251  	if err != nil {
  1252  		return metrics, errors.Trace(err)
  1253  	}
  1254  
  1255  	channel, requests, err := sshClient.OpenChannel(
  1256  		protocol.RANDOM_STREAM_CHANNEL_TYPE, extraData)
  1257  	if err != nil {
  1258  		return metrics, errors.Trace(err)
  1259  	}
  1260  	defer channel.Close()
  1261  
  1262  	go ssh.DiscardRequests(requests)
  1263  
  1264  	sent := 0
  1265  	received := 0
  1266  	upstream := new(sync.WaitGroup)
  1267  	var errUpstream, errDownstream error
  1268  
  1269  	if metrics.UpstreamBytes > 0 {
  1270  
  1271  		// Process streams concurrently to minimize elapsed time. This also
  1272  		// avoids a unidirectional flow burst early in the tunnel lifecycle.
  1273  
  1274  		upstream.Add(1)
  1275  		go func() {
  1276  			defer upstream.Done()
  1277  
  1278  			// In consideration of memory-constrained environments, use modest-sized copy
  1279  			// buffers since many tunnel establishment workers may run the liveness test
  1280  			// concurrently.
  1281  			var buffer [4096]byte
  1282  
  1283  			n, err := common.CopyNBuffer(channel, rand.Reader, int64(metrics.UpstreamBytes), buffer[:])
  1284  			sent = int(n)
  1285  			if err != nil {
  1286  				errUpstream = errors.Trace(err)
  1287  			}
  1288  		}()
  1289  	}
  1290  
  1291  	if metrics.DownstreamBytes > 0 {
  1292  		var buffer [4096]byte
  1293  		n, err := common.CopyNBuffer(ioutil.Discard, channel, int64(metrics.DownstreamBytes), buffer[:])
  1294  		received = int(n)
  1295  		if err != nil {
  1296  			errDownstream = errors.Trace(err)
  1297  		}
  1298  	}
  1299  
  1300  	upstream.Wait()
  1301  	metrics.SentUpstreamBytes = sent
  1302  	metrics.ReceivedDownstreamBytes = received
  1303  
  1304  	if errUpstream != nil {
  1305  		return metrics, errUpstream
  1306  	} else if errDownstream != nil {
  1307  		return metrics, errDownstream
  1308  	}
  1309  
  1310  	return metrics, nil
  1311  }
  1312  
  1313  // operateTunnel monitors the health of the tunnel and performs
  1314  // periodic work.
  1315  //
  1316  // BytesTransferred and TotalBytesTransferred notices are emitted
  1317  // for live reporting and diagnostics reporting, respectively.
  1318  //
  1319  // Status requests are sent to the Psiphon API to report bytes
  1320  // transferred.
  1321  //
  1322  // Periodic SSH keep alive packets are sent to ensure the underlying
  1323  // TCP connection isn't terminated by NAT, or other network
  1324  // interference -- or test if it has been terminated while the device
  1325  // has been asleep. When a keep alive times out, the tunnel is
  1326  // considered failed.
  1327  //
  1328  // An immediate SSH keep alive "probe" is sent to test the tunnel and
  1329  // server responsiveness when a port forward failure is detected: a
  1330  // failed dial or failed read/write. This keep alive has a shorter
  1331  // timeout.
  1332  //
  1333  // Note that port forward failures may be due to non-failure conditions.
  1334  // For example, when the user inputs an invalid domain name and
  1335  // resolution is done by the ssh server; or trying to connect to a
  1336  // non-white-listed port; and the error message in these cases is not
  1337  // distinguishable from a a true server error (a common error message,
  1338  // "ssh: rejected: administratively prohibited (open failed)", may be
  1339  // returned for these cases but also if the server has run out of
  1340  // ephemeral ports, for example).
  1341  //
  1342  // SSH keep alives are not sent when the tunnel has been recently
  1343  // active (not only does tunnel activity obviate the necessity of a keep
  1344  // alive, testing has shown that keep alives may time out for "busy"
  1345  // tunnels, especially over meek protocol and other high latency
  1346  // conditions).
  1347  //
  1348  // "Recently active" is defined has having received payload bytes. Sent
  1349  // bytes are not considered as testing has shown bytes may appear to
  1350  // send when certain NAT devices have interfered with the tunnel, while
  1351  // no bytes are received. In a pathological case, with DNS implemented
  1352  // as tunneled UDP, a browser may wait excessively for a domain name to
  1353  // resolve, while no new port forward is attempted which would otherwise
  1354  // result in a tunnel failure detection.
  1355  //
  1356  // TODO: change "recently active" to include having received any
  1357  // SSH protocol messages from the server, not just user payload?
  1358  func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
  1359  	defer tunnel.operateWaitGroup.Done()
  1360  
  1361  	now := time.Now()
  1362  	lastBytesReceivedTime := now
  1363  	lastTotalBytesTransferedTime := now
  1364  	totalSent := int64(0)
  1365  	totalReceived := int64(0)
  1366  	setDialParamsSucceeded := false
  1367  
  1368  	noticeBytesTransferredTicker := time.NewTicker(1 * time.Second)
  1369  	defer noticeBytesTransferredTicker.Stop()
  1370  
  1371  	// The next status request and ssh keep alive times are picked at random,
  1372  	// from a range, to make the resulting traffic less fingerprintable,
  1373  	// Note: not using Tickers since these are not fixed time periods.
  1374  	nextStatusRequestPeriod := func() time.Duration {
  1375  		p := tunnel.getCustomParameters()
  1376  		return prng.Period(
  1377  			p.Duration(parameters.PsiphonAPIStatusRequestPeriodMin),
  1378  			p.Duration(parameters.PsiphonAPIStatusRequestPeriodMax))
  1379  	}
  1380  
  1381  	statsTimer := time.NewTimer(nextStatusRequestPeriod())
  1382  	defer statsTimer.Stop()
  1383  
  1384  	// Schedule an almost-immediate status request to deliver any unreported
  1385  	// persistent stats.
  1386  	unreported := CountUnreportedPersistentStats()
  1387  	if unreported > 0 {
  1388  		NoticeInfo("Unreported persistent stats: %d", unreported)
  1389  		p := tunnel.getCustomParameters()
  1390  		statsTimer.Reset(
  1391  			prng.Period(
  1392  				p.Duration(parameters.PsiphonAPIStatusRequestShortPeriodMin),
  1393  				p.Duration(parameters.PsiphonAPIStatusRequestShortPeriodMax)))
  1394  	}
  1395  
  1396  	nextSshKeepAlivePeriod := func() time.Duration {
  1397  		p := tunnel.getCustomParameters()
  1398  		return prng.Period(
  1399  			p.Duration(parameters.SSHKeepAlivePeriodMin),
  1400  			p.Duration(parameters.SSHKeepAlivePeriodMax))
  1401  	}
  1402  
  1403  	// TODO: don't initialize timer when config.DisablePeriodicSshKeepAlive is set
  1404  	sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod())
  1405  	if tunnel.config.DisablePeriodicSshKeepAlive {
  1406  		sshKeepAliveTimer.Stop()
  1407  	} else {
  1408  		defer sshKeepAliveTimer.Stop()
  1409  	}
  1410  
  1411  	// Perform network requests in separate goroutines so as not to block
  1412  	// other operations.
  1413  	requestsWaitGroup := new(sync.WaitGroup)
  1414  
  1415  	requestsWaitGroup.Add(1)
  1416  	signalStatusRequest := make(chan struct{})
  1417  	go func() {
  1418  		defer requestsWaitGroup.Done()
  1419  		for range signalStatusRequest {
  1420  			sendStats(tunnel)
  1421  		}
  1422  	}()
  1423  
  1424  	requestsWaitGroup.Add(1)
  1425  	signalPeriodicSshKeepAlive := make(chan time.Duration)
  1426  	sshKeepAliveError := make(chan error, 1)
  1427  	go func() {
  1428  		defer requestsWaitGroup.Done()
  1429  		isFirstPeriodicKeepAlive := true
  1430  		for timeout := range signalPeriodicSshKeepAlive {
  1431  			bytesUp := atomic.LoadInt64(&totalSent)
  1432  			bytesDown := atomic.LoadInt64(&totalReceived)
  1433  			err := tunnel.sendSshKeepAlive(
  1434  				isFirstPeriodicKeepAlive, false, timeout, bytesUp, bytesDown)
  1435  			if err != nil {
  1436  				select {
  1437  				case sshKeepAliveError <- err:
  1438  				default:
  1439  				}
  1440  			}
  1441  			isFirstPeriodicKeepAlive = false
  1442  		}
  1443  	}()
  1444  
  1445  	// Probe-type SSH keep alives have a distinct send worker and may be sent
  1446  	// concurrently, to ensure a long period keep alive timeout doesn't delay
  1447  	// failed tunnel detection.
  1448  
  1449  	requestsWaitGroup.Add(1)
  1450  	signalProbeSshKeepAlive := make(chan time.Duration)
  1451  	go func() {
  1452  		defer requestsWaitGroup.Done()
  1453  		for timeout := range signalProbeSshKeepAlive {
  1454  			bytesUp := atomic.LoadInt64(&totalSent)
  1455  			bytesDown := atomic.LoadInt64(&totalReceived)
  1456  			err := tunnel.sendSshKeepAlive(
  1457  				false, true, timeout, bytesUp, bytesDown)
  1458  			if err != nil {
  1459  				select {
  1460  				case sshKeepAliveError <- err:
  1461  				default:
  1462  				}
  1463  			}
  1464  		}
  1465  	}()
  1466  
  1467  	shutdown := false
  1468  	var err error
  1469  	for !shutdown && err == nil {
  1470  		select {
  1471  		case <-noticeBytesTransferredTicker.C:
  1472  			sent, received := transferstats.ReportRecentBytesTransferredForServer(
  1473  				tunnel.dialParams.ServerEntry.IpAddress)
  1474  
  1475  			if received > 0 {
  1476  				lastBytesReceivedTime = time.Now()
  1477  			}
  1478  
  1479  			bytesUp := atomic.AddInt64(&totalSent, sent)
  1480  			bytesDown := atomic.AddInt64(&totalReceived, received)
  1481  
  1482  			p := tunnel.getCustomParameters()
  1483  			noticePeriod := p.Duration(parameters.TotalBytesTransferredNoticePeriod)
  1484  			doEmitMemoryMetrics := p.Bool(parameters.TotalBytesTransferredEmitMemoryMetrics)
  1485  			replayTargetUpstreamBytes := p.Int(parameters.ReplayTargetUpstreamBytes)
  1486  			replayTargetDownstreamBytes := p.Int(parameters.ReplayTargetDownstreamBytes)
  1487  			replayTargetTunnelDuration := p.Duration(parameters.ReplayTargetTunnelDuration)
  1488  
  1489  			if lastTotalBytesTransferedTime.Add(noticePeriod).Before(time.Now()) {
  1490  				NoticeTotalBytesTransferred(
  1491  					tunnel.dialParams.ServerEntry.GetDiagnosticID(), bytesUp, bytesDown)
  1492  				if doEmitMemoryMetrics {
  1493  					emitMemoryMetrics()
  1494  				}
  1495  				lastTotalBytesTransferedTime = time.Now()
  1496  			}
  1497  
  1498  			// Only emit the frequent BytesTransferred notice when tunnel is not idle.
  1499  			if tunnel.config.EmitBytesTransferred && (sent > 0 || received > 0) {
  1500  				NoticeBytesTransferred(
  1501  					tunnel.dialParams.ServerEntry.GetDiagnosticID(), sent, received)
  1502  			}
  1503  
  1504  			// Once the tunnel has connected, activated, successfully transmitted the
  1505  			// targeted number of bytes, and been up for the targeted duration
  1506  			// (measured from the end of establishment), store its dial parameters for
  1507  			// subsequent replay.
  1508  			//
  1509  			// Even when target bytes and duration are all 0, the tunnel must remain up
  1510  			// for at least 1 second due to use of noticeBytesTransferredTicker; for
  1511  			// the same reason the granularity of ReplayTargetTunnelDuration is
  1512  			// seconds.
  1513  			if !setDialParamsSucceeded &&
  1514  				bytesUp >= int64(replayTargetUpstreamBytes) &&
  1515  				bytesDown >= int64(replayTargetDownstreamBytes) &&
  1516  				time.Since(tunnel.establishedTime) >= replayTargetTunnelDuration {
  1517  
  1518  				tunnel.dialParams.Succeeded()
  1519  				setDialParamsSucceeded = true
  1520  			}
  1521  
  1522  		case <-statsTimer.C:
  1523  			select {
  1524  			case signalStatusRequest <- struct{}{}:
  1525  			default:
  1526  			}
  1527  			statsTimer.Reset(nextStatusRequestPeriod())
  1528  
  1529  		case <-sshKeepAliveTimer.C:
  1530  			p := tunnel.getCustomParameters()
  1531  			inactivePeriod := p.Duration(parameters.SSHKeepAlivePeriodicInactivePeriod)
  1532  			if lastBytesReceivedTime.Add(inactivePeriod).Before(time.Now()) {
  1533  				timeout := p.Duration(parameters.SSHKeepAlivePeriodicTimeout)
  1534  				select {
  1535  				case signalPeriodicSshKeepAlive <- timeout:
  1536  				default:
  1537  				}
  1538  			}
  1539  			sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
  1540  
  1541  		case <-tunnel.signalPortForwardFailure:
  1542  			// Note: no mutex on portForwardFailureTotal; only referenced here
  1543  			tunnel.totalPortForwardFailures++
  1544  			NoticeInfo("port forward failures for %s: %d",
  1545  				tunnel.dialParams.ServerEntry.GetDiagnosticID(),
  1546  				tunnel.totalPortForwardFailures)
  1547  
  1548  			// If the underlying Conn has closed (meek and other plugin protocols may
  1549  			// close themselves in certain error conditions), the tunnel has certainly
  1550  			// failed. Otherwise, probe with an SSH keep alive.
  1551  			//
  1552  			// TODO: the IsClosed case omits the failed tunnel logging and reset
  1553  			// actions performed by sendSshKeepAlive. Should self-closing protocols
  1554  			// perform these actions themselves?
  1555  
  1556  			if tunnel.conn.IsClosed() {
  1557  				err = errors.TraceNew("underlying conn is closed")
  1558  			} else {
  1559  				p := tunnel.getCustomParameters()
  1560  				inactivePeriod := p.Duration(parameters.SSHKeepAliveProbeInactivePeriod)
  1561  				if lastBytesReceivedTime.Add(inactivePeriod).Before(time.Now()) {
  1562  					timeout := p.Duration(parameters.SSHKeepAliveProbeTimeout)
  1563  					select {
  1564  					case signalProbeSshKeepAlive <- timeout:
  1565  					default:
  1566  					}
  1567  				}
  1568  				if !tunnel.config.DisablePeriodicSshKeepAlive {
  1569  					sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
  1570  				}
  1571  
  1572  			}
  1573  
  1574  		case err = <-sshKeepAliveError:
  1575  
  1576  		case serverRequest := <-tunnel.sshServerRequests:
  1577  			if serverRequest != nil {
  1578  				err := HandleServerRequest(tunnelOwner, tunnel, serverRequest.Type, serverRequest.Payload)
  1579  				if err == nil {
  1580  					serverRequest.Reply(true, nil)
  1581  				} else {
  1582  					NoticeWarning("HandleServerRequest for %s failed: %s", serverRequest.Type, err)
  1583  					serverRequest.Reply(false, nil)
  1584  
  1585  				}
  1586  			}
  1587  
  1588  		case <-tunnel.operateCtx.Done():
  1589  			shutdown = true
  1590  		}
  1591  	}
  1592  
  1593  	close(signalPeriodicSshKeepAlive)
  1594  	close(signalProbeSshKeepAlive)
  1595  	close(signalStatusRequest)
  1596  	requestsWaitGroup.Wait()
  1597  
  1598  	// Capture bytes transferred since the last noticeBytesTransferredTicker tick
  1599  	sent, received := transferstats.ReportRecentBytesTransferredForServer(
  1600  		tunnel.dialParams.ServerEntry.IpAddress)
  1601  	bytesUp := atomic.AddInt64(&totalSent, sent)
  1602  	bytesDown := atomic.AddInt64(&totalReceived, received)
  1603  
  1604  	// Always emit a final NoticeTotalBytesTransferred
  1605  	NoticeTotalBytesTransferred(
  1606  		tunnel.dialParams.ServerEntry.GetDiagnosticID(), bytesUp, bytesDown)
  1607  
  1608  	if err == nil {
  1609  
  1610  		NoticeInfo("shutdown operate tunnel")
  1611  
  1612  		// This commanded shutdown case is initiated by Tunnel.Close, which will
  1613  		// wait up to parameters.TunnelOperateShutdownTimeout to allow the following
  1614  		// requests to complete.
  1615  
  1616  		// Send a final status request in order to report any outstanding persistent
  1617  		// stats and domain bytes transferred as soon as possible.
  1618  
  1619  		sendStats(tunnel)
  1620  
  1621  		// The controller connectedReporter may have initiated a connected request
  1622  		// concurrent to this commanded shutdown. SetInFlightConnectedRequest
  1623  		// ensures that a connected request doesn't start after the commanded
  1624  		// shutdown. AwaitInFlightConnectedRequest blocks until any in flight
  1625  		// request completes or is aborted after TunnelOperateShutdownTimeout.
  1626  		//
  1627  		// As any connected request is performed by a concurrent goroutine,
  1628  		// sendStats is called first and AwaitInFlightConnectedRequest second.
  1629  
  1630  		tunnel.AwaitInFlightConnectedRequest()
  1631  
  1632  	} else {
  1633  
  1634  		NoticeWarning("operate tunnel error for %s: %s",
  1635  			tunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
  1636  
  1637  		tunnelOwner.SignalTunnelFailure(tunnel)
  1638  	}
  1639  }
  1640  
  1641  // sendSshKeepAlive is a helper which sends a keepalive@openssh.com request
  1642  // on the specified SSH connections and returns true of the request succeeds
  1643  // within a specified timeout. If the request fails, the associated conn is
  1644  // closed, which will terminate the associated tunnel.
  1645  func (tunnel *Tunnel) sendSshKeepAlive(
  1646  	isFirstPeriodicKeepAlive bool,
  1647  	isProbeKeepAlive bool,
  1648  	timeout time.Duration,
  1649  	bytesUp int64,
  1650  	bytesDown int64) error {
  1651  
  1652  	p := tunnel.getCustomParameters()
  1653  
  1654  	// Random padding to frustrate fingerprinting.
  1655  	request := prng.Padding(
  1656  		p.Int(parameters.SSHKeepAlivePaddingMinBytes),
  1657  		p.Int(parameters.SSHKeepAlivePaddingMaxBytes))
  1658  
  1659  	speedTestSample := isFirstPeriodicKeepAlive
  1660  	if !speedTestSample {
  1661  		speedTestSample = p.WeightedCoinFlip(
  1662  			parameters.SSHKeepAliveSpeedTestSampleProbability)
  1663  	}
  1664  
  1665  	networkConnectivityPollPeriod := p.Duration(
  1666  		parameters.SSHKeepAliveNetworkConnectivityPollingPeriod)
  1667  
  1668  	resetOnFailure := p.WeightedCoinFlip(
  1669  		parameters.SSHKeepAliveResetOnFailureProbability)
  1670  
  1671  	p.Close()
  1672  
  1673  	// Note: there is no request context since SSH requests cannot be interrupted
  1674  	// directly. Closing the tunnel will interrupt the request. A timeout is set
  1675  	// to unblock this function, but the goroutine may not exit until the tunnel
  1676  	// is closed.
  1677  
  1678  	// Use a buffer of 1 as there are two senders and only one guaranteed receive.
  1679  	errChannel := make(chan error, 1)
  1680  
  1681  	afterFunc := time.AfterFunc(timeout, func() {
  1682  		errChannel <- errors.TraceNew("timed out")
  1683  	})
  1684  	defer afterFunc.Stop()
  1685  
  1686  	go func() {
  1687  
  1688  		startTime := time.Now()
  1689  
  1690  		// Note: reading a reply is important for last-received-time tunnel
  1691  		// duration calculation.
  1692  		requestOk, response, err := tunnel.sshClient.SendRequest(
  1693  			"keepalive@openssh.com", true, request)
  1694  
  1695  		elapsedTime := time.Since(startTime)
  1696  
  1697  		errChannel <- err
  1698  
  1699  		success := (err == nil && requestOk)
  1700  
  1701  		if success && isProbeKeepAlive {
  1702  			NoticeInfo("Probe SSH keep-alive RTT: %s", elapsedTime)
  1703  		}
  1704  
  1705  		// Record the keep alive round trip as a speed test sample. The first
  1706  		// periodic keep alive is always recorded, as many tunnels are short-lived
  1707  		// and we want to ensure that some data is gathered. Subsequent keep alives
  1708  		// are recorded with some configurable probability, which, considering that
  1709  		// only the last SpeedTestMaxSampleCount samples are retained, enables
  1710  		// tuning the sampling frequency.
  1711  
  1712  		if success && speedTestSample {
  1713  
  1714  			err = tactics.AddSpeedTestSample(
  1715  				tunnel.config.GetParameters(),
  1716  				GetTacticsStorer(tunnel.config),
  1717  				tunnel.config.GetNetworkID(),
  1718  				tunnel.dialParams.ServerEntry.Region,
  1719  				tunnel.dialParams.TunnelProtocol,
  1720  				elapsedTime,
  1721  				request,
  1722  				response)
  1723  			if err != nil {
  1724  				NoticeWarning("AddSpeedTestSample failed: %s", errors.Trace(err))
  1725  			}
  1726  		}
  1727  	}()
  1728  
  1729  	// While awaiting the response, poll the network connectivity state. If there
  1730  	// is network connectivity, on the same network, for the entire duration of
  1731  	// the keep alive request and the request fails, record a failed tunnel
  1732  	// event.
  1733  	//
  1734  	// The network connectivity heuristic is intended to reduce the number of
  1735  	// failed tunnels reported due to routine situations such as varying mobile
  1736  	// network conditions. The polling may produce false positives if the network
  1737  	// goes down and up between polling periods, or changes to a new network and
  1738  	// back to the previous network between polling periods.
  1739  	//
  1740  	// For platforms that don't provide a NetworkConnectivityChecker, it is
  1741  	// assumed that there is network connectivity.
  1742  	//
  1743  	// The approximate number of tunneled bytes successfully sent and received is
  1744  	// recorded in the failed tunnel event as a quality indicator.
  1745  
  1746  	ticker := time.NewTicker(networkConnectivityPollPeriod)
  1747  	defer ticker.Stop()
  1748  	continuousNetworkConnectivity := true
  1749  	networkID := tunnel.config.GetNetworkID()
  1750  
  1751  	var err error
  1752  loop:
  1753  	for {
  1754  		select {
  1755  		case err = <-errChannel:
  1756  			break loop
  1757  		case <-ticker.C:
  1758  			connectivityChecker := tunnel.config.NetworkConnectivityChecker
  1759  			if (connectivityChecker != nil &&
  1760  				connectivityChecker.HasNetworkConnectivity() != 1) ||
  1761  				(networkID != tunnel.config.GetNetworkID()) {
  1762  
  1763  				continuousNetworkConnectivity = false
  1764  			}
  1765  		}
  1766  	}
  1767  
  1768  	err = errors.Trace(err)
  1769  
  1770  	if err != nil {
  1771  		tunnel.sshClient.Close()
  1772  		tunnel.conn.Close()
  1773  
  1774  		// Don't perform log or reset actions when the keep alive may have been
  1775  		// interrupted due to shutdown.
  1776  
  1777  		isShutdown := false
  1778  		select {
  1779  		case <-tunnel.operateCtx.Done():
  1780  			isShutdown = true
  1781  		default:
  1782  		}
  1783  
  1784  		// Ensure that at most one of the two SSH keep alive workers (periodic and
  1785  		// probe) perform the log and reset actions.
  1786  
  1787  		wasHandled := atomic.CompareAndSwapInt32(&tunnel.handledSSHKeepAliveFailure, 0, 1)
  1788  
  1789  		if continuousNetworkConnectivity &&
  1790  			!isShutdown &&
  1791  			!wasHandled {
  1792  
  1793  			_ = RecordFailedTunnelStat(
  1794  				tunnel.config,
  1795  				tunnel.dialParams,
  1796  				tunnel.livenessTestMetrics,
  1797  				bytesUp,
  1798  				bytesDown,
  1799  				err)
  1800  			if tunnel.extraFailureAction != nil {
  1801  				tunnel.extraFailureAction()
  1802  			}
  1803  
  1804  			// SSHKeepAliveResetOnFailureProbability is set when a late-lifecycle
  1805  			// impaired protocol attack is suspected. With the given probability, reset
  1806  			// server affinity and replay parameters for this server to avoid
  1807  			// continuously reconnecting to the server and/or using the same protocol
  1808  			// and dial parameters.
  1809  
  1810  			if resetOnFailure {
  1811  				NoticeInfo("Delete dial parameters for %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1812  				err := DeleteDialParameters(tunnel.dialParams.ServerEntry.IpAddress, tunnel.dialParams.NetworkID)
  1813  				if err != nil {
  1814  					NoticeWarning("DeleteDialParameters failed: %s", err)
  1815  				}
  1816  				NoticeInfo("Delete server affinity for %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1817  				err = DeleteServerEntryAffinity(tunnel.dialParams.ServerEntry.IpAddress)
  1818  				if err != nil {
  1819  					NoticeWarning("DeleteServerEntryAffinity failed: %s", err)
  1820  				}
  1821  			}
  1822  		}
  1823  	}
  1824  
  1825  	return err
  1826  }
  1827  
  1828  // sendStats is a helper for sending session stats to the server.
  1829  func sendStats(tunnel *Tunnel) bool {
  1830  
  1831  	// Tunnel does not have a serverContext when DisableApi is set
  1832  	if tunnel.serverContext == nil {
  1833  		return true
  1834  	}
  1835  
  1836  	// Skip when tunnel is discarded
  1837  	if tunnel.IsDiscarded() {
  1838  		return true
  1839  	}
  1840  
  1841  	err := tunnel.serverContext.DoStatusRequest(tunnel)
  1842  	if err != nil {
  1843  		NoticeWarning("DoStatusRequest failed for %s: %s",
  1844  			tunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
  1845  	}
  1846  
  1847  	return err == nil
  1848  }