github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/refraction/refraction.go (about)

     1  //go:build PSIPHON_ENABLE_REFRACTION_NETWORKING
     2  // +build PSIPHON_ENABLE_REFRACTION_NETWORKING
     3  
     4  /*
     5   * Copyright (c) 2018, Psiphon Inc.
     6   * All rights reserved.
     7   *
     8   * This program is free software: you can redistribute it and/or modify
     9   * it under the terms of the GNU General Public License as published by
    10   * the Free Software Foundation, either version 3 of the License, or
    11   * (at your option) any later version.
    12   *
    13   * This program is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16   * GNU General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU General Public License
    19   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    20   *
    21   */
    22  
    23  /*
    24  
    25  Package refraction wraps github.com/refraction-networking/gotapdance with
    26  net.Listener and net.Conn types that provide drop-in integration with Psiphon.
    27  
    28  */
    29  package refraction
    30  
    31  import (
    32  	"context"
    33  	"crypto/sha256"
    34  	"fmt"
    35  	"io/ioutil"
    36  	"net"
    37  	"os"
    38  	"path/filepath"
    39  	"sync"
    40  	"sync/atomic"
    41  	"time"
    42  
    43  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    44  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    45  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
    46  	"github.com/armon/go-proxyproto"
    47  	lrucache "github.com/cognusion/go-cache-lru"
    48  	refraction_networking_proto "github.com/refraction-networking/gotapdance/protobuf"
    49  	refraction_networking_client "github.com/refraction-networking/gotapdance/tapdance"
    50  )
    51  
    52  const (
    53  	READ_PROXY_PROTOCOL_HEADER_TIMEOUT = 5 * time.Second
    54  	REGISTRATION_CACHE_MAX_ENTRIES     = 256
    55  )
    56  
    57  // Enabled indicates if Refraction Networking functionality is enabled.
    58  func Enabled() bool {
    59  	return true
    60  }
    61  
    62  // Listener is a net.Listener.
    63  type Listener struct {
    64  	net.Listener
    65  }
    66  
    67  // Listen creates a new Refraction Networking listener.
    68  //
    69  // The Refraction Networking station (TapDance or Conjure) will send the
    70  // original client address via the HAProxy proxy protocol v1,
    71  // https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt. The original
    72  // client address is read and returned by accepted conns' RemoteAddr.
    73  // RemoteAddr _must_ be called non-concurrently before calling Read on
    74  // accepted conns as the HAProxy proxy protocol header reading logic sets
    75  // SetReadDeadline and performs a Read.
    76  //
    77  // Psiphon server hosts should be configured to accept tunnel connections only
    78  // from Refraction Networking stations.
    79  func Listen(address string) (net.Listener, error) {
    80  
    81  	tcpListener, err := net.Listen("tcp", address)
    82  	if err != nil {
    83  		return nil, errors.Trace(err)
    84  	}
    85  
    86  	// Setting a timeout ensures that reading the proxy protocol
    87  	// header completes or times out and RemoteAddr will not block. See:
    88  	// https://godoc.org/github.com/armon/go-proxyproto#Conn.RemoteAddr
    89  
    90  	proxyListener := &proxyproto.Listener{
    91  		Listener:           tcpListener,
    92  		ProxyHeaderTimeout: READ_PROXY_PROTOCOL_HEADER_TIMEOUT}
    93  
    94  	stationListener := &stationListener{
    95  		proxyListener: proxyListener,
    96  	}
    97  
    98  	return &Listener{Listener: stationListener}, nil
    99  }
   100  
   101  // stationListener uses the proxyproto.Listener SourceCheck callback to
   102  // capture and record the direct remote address, the station address, and
   103  // wraps accepted conns to provide station address metrics via GetMetrics.
   104  // These metrics enable identifying which station fronted a connection, which
   105  // is useful for network operations and troubleshooting.
   106  //
   107  // go-proxyproto.Conn.RemoteAddr reports the originating client IP address,
   108  // which is geolocated and recorded for metrics. The underlying conn's remote
   109  // address, the station address, is not accessible via the go-proxyproto API.
   110  //
   111  // stationListener is not safe for concurrent access.
   112  type stationListener struct {
   113  	proxyListener *proxyproto.Listener
   114  }
   115  
   116  func (l *stationListener) Accept() (net.Conn, error) {
   117  	var stationRemoteAddr net.Addr
   118  	l.proxyListener.SourceCheck = func(addr net.Addr) (bool, error) {
   119  		stationRemoteAddr = addr
   120  		return true, nil
   121  	}
   122  	conn, err := l.proxyListener.Accept()
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  	if stationRemoteAddr == nil {
   127  		return nil, errors.TraceNew("missing station address")
   128  	}
   129  	return &stationConn{
   130  		Conn:             conn,
   131  		stationIPAddress: common.IPAddressFromAddr(stationRemoteAddr),
   132  	}, nil
   133  }
   134  
   135  func (l *stationListener) Close() error {
   136  	return l.proxyListener.Close()
   137  }
   138  
   139  func (l *stationListener) Addr() net.Addr {
   140  	return l.proxyListener.Addr()
   141  }
   142  
   143  type stationConn struct {
   144  	net.Conn
   145  	stationIPAddress string
   146  }
   147  
   148  // IrregularTunnelError implements the common.IrregularIndicator interface.
   149  func (c *stationConn) IrregularTunnelError() error {
   150  
   151  	// We expect a PROXY protocol header, but go-proxyproto does not produce an
   152  	// error if the "PROXY " prefix is absent; instead the connection will
   153  	// proceed. To detect this case, check if the go-proxyproto RemoteAddr IP
   154  	// address matches the underlying connection IP address. When these values
   155  	// match, there was no PROXY protocol header.
   156  	//
   157  	// Limitation: the values will match if there is a PROXY protocol header
   158  	// containing the same IP address as the underlying connection. This is not
   159  	// an expected case.
   160  
   161  	if common.IPAddressFromAddr(c.RemoteAddr()) == c.stationIPAddress {
   162  		return errors.TraceNew("unexpected station IP address")
   163  	}
   164  	return nil
   165  }
   166  
   167  // GetMetrics implements the common.MetricsSource interface.
   168  func (c *stationConn) GetMetrics() common.LogFields {
   169  
   170  	logFields := make(common.LogFields)
   171  
   172  	// Ensure we don't log a potential non-station IP address.
   173  	if c.IrregularTunnelError() == nil {
   174  		logFields["station_ip_address"] = c.stationIPAddress
   175  	}
   176  
   177  	return logFields
   178  }
   179  
   180  // DialTapDance establishes a new TapDance connection to a TapDance station
   181  // specified in the config assets and forwarding through to the Psiphon server
   182  // specified by address.
   183  //
   184  // The TapDance station config assets (which are also the Conjure station
   185  // assets) are read from dataDirectory/"refraction-networking". When no config
   186  // is found, default assets are paved.
   187  //
   188  // dialer specifies the custom dialer for underlying TCP dials.
   189  //
   190  // The input ctx is expected to have a timeout for the dial.
   191  //
   192  // Limitation: the parameters emitLogs and dataDirectory are used for one-time
   193  // initialization and are ignored after the first DialTapDance/Conjure call.
   194  func DialTapDance(
   195  	ctx context.Context,
   196  	emitLogs bool,
   197  	dataDirectory string,
   198  	dialer common.NetDialer,
   199  	address string) (net.Conn, error) {
   200  
   201  	return dial(
   202  		ctx,
   203  		emitLogs,
   204  		dataDirectory,
   205  		dialer,
   206  		address,
   207  		nil)
   208  }
   209  
   210  // DialConjure establishes a new Conjure connection to a Conjure station.
   211  //
   212  // dialer specifies the custom dialer to use for phantom dials. Additional
   213  // Conjure-specific parameters are specified in conjureConfig.
   214  //
   215  // See DialTapdance comment.
   216  func DialConjure(
   217  	ctx context.Context,
   218  	emitLogs bool,
   219  	dataDirectory string,
   220  	dialer common.NetDialer,
   221  	address string,
   222  	conjureConfig *ConjureConfig) (net.Conn, error) {
   223  
   224  	return dial(
   225  		ctx,
   226  		emitLogs,
   227  		dataDirectory,
   228  		dialer,
   229  		address,
   230  		conjureConfig)
   231  }
   232  
   233  func dial(
   234  	ctx context.Context,
   235  	emitLogs bool,
   236  	dataDirectory string,
   237  	dialer common.NetDialer,
   238  	address string,
   239  	conjureConfig *ConjureConfig) (net.Conn, error) {
   240  
   241  	err := initRefractionNetworking(emitLogs, dataDirectory)
   242  	if err != nil {
   243  		return nil, errors.Trace(err)
   244  	}
   245  
   246  	if _, ok := ctx.Deadline(); !ok {
   247  		return nil, errors.TraceNew("dial context has no timeout")
   248  	}
   249  
   250  	useConjure := conjureConfig != nil
   251  
   252  	manager := newDialManager()
   253  
   254  	refractionDialer := &refraction_networking_client.Dialer{
   255  		TcpDialer:      manager.makeManagedDialer(dialer.DialContext),
   256  		UseProxyHeader: true,
   257  	}
   258  
   259  	conjureCached := false
   260  	conjureDelay := time.Duration(0)
   261  
   262  	var conjureCachedRegistration *refraction_networking_client.ConjureReg
   263  	var conjureRecordRegistrar *recordRegistrar
   264  
   265  	if useConjure {
   266  
   267  		// Our strategy is to try one registration per dial attempt: a cached
   268  		// registration, if it exists, or API or decoy registration, as configured.
   269  		// This assumes Psiphon establishment will try/retry many candidates as
   270  		// required, and that the desired mix of API/decoy registrations will be
   271  		// configured and generated. In good network conditions, internal gotapdance
   272  		// retries (via APIRegistrar.MaxRetries or APIRegistrar.SecondaryRegistrar)
   273  		// are unlikely to start before the Conjure dial is canceled.
   274  
   275  		// Caching registrations reduces average Conjure dial time by often
   276  		// eliminating the registration phase. This is especially impactful for
   277  		// short duration tunnels, such as on mobile. Caching also reduces domain
   278  		// fronted traffic and load on the API registrar and decoys.
   279  		//
   280  		// We implement a simple in-memory registration cache with the following
   281  		// behavior:
   282  		//
   283  		// - If a new registration succeeds, but the overall Conjure dial is
   284  		//   _canceled_, the registration is optimistically cached.
   285  		// - If the Conjure phantom dial fails, any associated cached registration
   286  		//   is discarded.
   287  		// - A cached registration's TTL is extended upon phantom dial success.
   288  		// - If the configured TTL changes, the cache is cleared.
   289  		//
   290  		// Limitations:
   291  		// - The cache is not persistent.
   292  		// - There is no TTL extension during a long connection.
   293  		// - Caching a successful registration when the phantom dial is canceled may
   294  		//   skip the necessary "delay" step (however, an immediate re-establishment
   295  		//   to the same candidate is unlikely in this case).
   296  		//
   297  		// TODO:
   298  		// - Revisit when gotapdance adds its own caching.
   299  		// - Consider "pre-registering" Conjure when already connected with a
   300  		//   different protocol, so a Conjure registration is available on the next
   301  		//   establishment; in this scenario, a tunneled API registration would not
   302  		//   require domain fronting.
   303  
   304  		refractionDialer.DarkDecoy = true
   305  
   306  		// The pop operation removes the registration from the cache. This
   307  		// eliminates the possibility of concurrent candidates (with the same cache
   308  		// key) using and modifying the same registration, a potential race
   309  		// condition. The popped cached registration must be reinserted in the cache
   310  		// after canceling or success, but not on phantom dial failure.
   311  
   312  		conjureCachedRegistration = conjureRegistrationCache.pop(conjureConfig)
   313  
   314  		if conjureCachedRegistration != nil {
   315  
   316  			refractionDialer.DarkDecoyRegistrar = &cachedRegistrar{
   317  				registration: conjureCachedRegistration,
   318  			}
   319  
   320  			conjureCached = true
   321  			conjureDelay = 0 // report no delay
   322  
   323  		} else if conjureConfig.APIRegistrarBidirectionalURL != "" {
   324  
   325  			if conjureConfig.APIRegistrarHTTPClient == nil {
   326  				// While not a guaranteed check, if the APIRegistrarHTTPClient isn't set
   327  				// then the API registration would certainly be unfronted, resulting in a
   328  				// fingerprintable connection leak.
   329  				return nil, errors.TraceNew("missing APIRegistrarHTTPClient")
   330  			}
   331  
   332  			refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.APIRegistrarBidirectional{
   333  				Endpoint:        conjureConfig.APIRegistrarBidirectionalURL,
   334  				ConnectionDelay: conjureConfig.APIRegistrarDelay,
   335  				MaxRetries:      0,
   336  				Client:          conjureConfig.APIRegistrarHTTPClient,
   337  			}
   338  
   339  			conjureDelay = conjureConfig.APIRegistrarDelay
   340  
   341  		} else if conjureConfig.DecoyRegistrarDialer != nil {
   342  
   343  			refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.DecoyRegistrar{
   344  				TcpDialer: manager.makeManagedDialer(
   345  					conjureConfig.DecoyRegistrarDialer.DialContext),
   346  			}
   347  
   348  			refractionDialer.Width = conjureConfig.DecoyRegistrarWidth
   349  
   350  			// Limitation: the decoy regsitration delay is not currently exposed in the
   351  			// gotapdance API.
   352  			conjureDelay = -1 // don't report delay
   353  
   354  		} else {
   355  
   356  			return nil, errors.TraceNew("no conjure registrar specified")
   357  		}
   358  
   359  		if conjureCachedRegistration == nil && conjureConfig.RegistrationCacheTTL != 0 {
   360  
   361  			// Record the registration result in order to cache it.
   362  			conjureRecordRegistrar = &recordRegistrar{
   363  				registrar: refractionDialer.DarkDecoyRegistrar,
   364  			}
   365  			refractionDialer.DarkDecoyRegistrar = conjureRecordRegistrar
   366  		}
   367  
   368  		switch conjureConfig.Transport {
   369  		case protocol.CONJURE_TRANSPORT_MIN_OSSH:
   370  			refractionDialer.Transport = refraction_networking_proto.TransportType_Min
   371  			refractionDialer.TcpDialer = newMinTransportDialer(refractionDialer.TcpDialer)
   372  		case protocol.CONJURE_TRANSPORT_OBFS4_OSSH:
   373  			refractionDialer.Transport = refraction_networking_proto.TransportType_Obfs4
   374  		default:
   375  			return nil, errors.Tracef("invalid Conjure transport: %s", conjureConfig.Transport)
   376  		}
   377  
   378  		if conjureCachedRegistration != nil {
   379  
   380  			// When using a cached registration, patch its TcpDialer to use the custom
   381  			// dialer for this dial. In the non-cached code path, gotapdance will set
   382  			// refractionDialer.TcpDialer into a new registration.
   383  			conjureCachedRegistration.TcpDialer = refractionDialer.TcpDialer
   384  		}
   385  	}
   386  
   387  	// If the dial context is cancelled, use dialManager to interrupt
   388  	// refractionDialer.DialContext. See dialManager comment explaining why
   389  	// refractionDialer.DialContext may block even when the input context is
   390  	// cancelled.
   391  	dialComplete := make(chan struct{})
   392  	go func() {
   393  		select {
   394  		case <-ctx.Done():
   395  		case <-dialComplete:
   396  		}
   397  		select {
   398  		// Prioritize the dialComplete case.
   399  		case <-dialComplete:
   400  			return
   401  		default:
   402  		}
   403  		manager.close()
   404  	}()
   405  
   406  	conn, err := refractionDialer.DialContext(ctx, "tcp", address)
   407  	close(dialComplete)
   408  
   409  	if err != nil {
   410  		// Call manager.close before updating cache, to synchronously shutdown dials
   411  		// and ensure there are no further concurrent reads/writes to the recorded
   412  		// registration before referencing it.
   413  		manager.close()
   414  	}
   415  
   416  	// Cache (or put back) a successful registration. Also put back in the
   417  	// specific error case where the phantom dial was canceled, as the
   418  	// registration may still be valid. This operation implicitly extends the TTL
   419  	// of a reused cached registration; we assume the Conjure station is also
   420  	// extending the TTL by the same amount.
   421  	//
   422  	// Limitation: the cancel case shouldn't extend the TTL.
   423  
   424  	if useConjure && (conjureCachedRegistration != nil || conjureRecordRegistrar != nil) {
   425  
   426  		isCanceled := (err != nil && ctx.Err() == context.Canceled)
   427  
   428  		if err == nil || isCanceled {
   429  
   430  			registration := conjureCachedRegistration
   431  			if registration == nil {
   432  				// We assume gotapdance is no longer accessing the Registrar.
   433  				registration = conjureRecordRegistrar.registration
   434  			}
   435  
   436  			// conjureRecordRegistrar.registration will be nil if there was no cached
   437  			// registration _and_ registration didn't succeed before a cancel.
   438  			if registration != nil {
   439  
   440  				// Do not retain a reference to the custom dialer, as its context will not
   441  				// be valid for future dials using this cached registration. Assumes that
   442  				// gotapdance will no longer reference the TcpDialer now that the
   443  				// connection is established.
   444  				registration.TcpDialer = nil
   445  
   446  				conjureRegistrationCache.put(conjureConfig, registration, isCanceled)
   447  			}
   448  
   449  		} else if conjureCachedRegistration != nil {
   450  
   451  			conjureConfig.Logger.WithTraceFields(
   452  				common.LogFields{
   453  					"diagnosticID": conjureConfig.DiagnosticID,
   454  					"reason":       "phantom dial failed",
   455  				}).Info(
   456  				"drop cached registration")
   457  		}
   458  	}
   459  
   460  	if err != nil {
   461  		return nil, errors.Trace(err)
   462  	}
   463  
   464  	manager.startUsingRunCtx()
   465  
   466  	refractionConn := &refractionConn{
   467  		Conn:    conn,
   468  		manager: manager,
   469  	}
   470  
   471  	if useConjure {
   472  		// Retain these values for logging metrics.
   473  		refractionConn.isConjure = true
   474  		refractionConn.conjureCached = conjureCached
   475  		refractionConn.conjureDelay = conjureDelay
   476  		refractionConn.conjureTransport = conjureConfig.Transport
   477  	}
   478  
   479  	return refractionConn, nil
   480  }
   481  
   482  func DeleteCachedConjureRegistration(config *ConjureConfig) {
   483  	conjureRegistrationCache.delete(config)
   484  }
   485  
   486  type registrationCache struct {
   487  	mutex sync.Mutex
   488  	TTL   time.Duration
   489  	cache *lrucache.Cache
   490  }
   491  
   492  func newRegistrationCache() *registrationCache {
   493  	return &registrationCache{
   494  		cache: lrucache.NewWithLRU(
   495  			lrucache.NoExpiration,
   496  			1*time.Minute,
   497  			REGISTRATION_CACHE_MAX_ENTRIES),
   498  	}
   499  }
   500  
   501  func (c *registrationCache) put(
   502  	config *ConjureConfig,
   503  	registration *refraction_networking_client.ConjureReg,
   504  	isCanceled bool) {
   505  
   506  	c.mutex.Lock()
   507  	defer c.mutex.Unlock()
   508  
   509  	// Clear the entire cache if the configured TTL changes to avoid retaining
   510  	// items for too long. This is expected to be an infrequent event. The
   511  	// go-cache-lru API does not offer a mechanism to inspect and adjust the TTL
   512  	// of all existing items.
   513  	if c.TTL != config.RegistrationCacheTTL {
   514  		c.cache.Flush()
   515  		c.TTL = config.RegistrationCacheTTL
   516  	}
   517  
   518  	// Drop the cached registration if another entry is found under the same key.
   519  	// Since the dial pops its entry out of the cache, finding an existing entry
   520  	// implies that another tunnel establishment candidate with the same key has
   521  	// successfully registered and connected (or canceled) in the meantime.
   522  	// Prefer that newer cached registration.
   523  	//
   524  	// For Psiphon, one scenario resulting in this condition is that the first
   525  	// dial to a given server, using a cached registration, is delayed long
   526  	// enough that a new candidate for the same server has been started and
   527  	// outpaced the first candidate.
   528  	_, found := c.cache.Get(config.RegistrationCacheKey)
   529  	if found {
   530  		config.Logger.WithTraceFields(
   531  			common.LogFields{
   532  				"diagnosticID": config.DiagnosticID,
   533  				"reason":       "existing entry found",
   534  			}).Info(
   535  			"drop cached registration")
   536  		return
   537  	}
   538  
   539  	reason := "connected"
   540  	if isCanceled {
   541  		reason = "canceled"
   542  	}
   543  
   544  	config.Logger.WithTraceFields(
   545  		common.LogFields{
   546  			"diagnosticID": config.DiagnosticID,
   547  			"cacheSize":    c.cache.ItemCount(),
   548  			"reason":       reason,
   549  		}).Info(
   550  		"put cached registration")
   551  
   552  	c.cache.Set(
   553  		config.RegistrationCacheKey,
   554  		registration,
   555  		c.TTL)
   556  }
   557  
   558  func (c *registrationCache) pop(
   559  	config *ConjureConfig) *refraction_networking_client.ConjureReg {
   560  
   561  	c.mutex.Lock()
   562  	defer c.mutex.Unlock()
   563  
   564  	// See TTL/Flush comment in put.
   565  	if c.TTL != config.RegistrationCacheTTL {
   566  		c.cache.Flush()
   567  		c.TTL = config.RegistrationCacheTTL
   568  	}
   569  
   570  	entry, found := c.cache.Get(config.RegistrationCacheKey)
   571  
   572  	config.Logger.WithTraceFields(
   573  		common.LogFields{
   574  			"diagnosticID": config.DiagnosticID,
   575  			"cacheSize":    c.cache.ItemCount(),
   576  			"found":        found,
   577  		}).Info(
   578  		"pop cached registration")
   579  
   580  	if found {
   581  		c.cache.Delete(config.RegistrationCacheKey)
   582  		return entry.(*refraction_networking_client.ConjureReg)
   583  	}
   584  
   585  	return nil
   586  }
   587  
   588  func (c *registrationCache) delete(config *ConjureConfig) {
   589  
   590  	c.mutex.Lock()
   591  	defer c.mutex.Unlock()
   592  
   593  	_, found := c.cache.Get(config.RegistrationCacheKey)
   594  
   595  	config.Logger.WithTraceFields(
   596  		common.LogFields{
   597  			"diagnosticID": config.DiagnosticID,
   598  			"found":        found,
   599  		}).Info(
   600  		"delete cached registration")
   601  
   602  	if found {
   603  		c.cache.Delete(config.RegistrationCacheKey)
   604  	}
   605  }
   606  
   607  var conjureRegistrationCache = newRegistrationCache()
   608  
   609  type cachedRegistrar struct {
   610  	registration *refraction_networking_client.ConjureReg
   611  }
   612  
   613  func (r *cachedRegistrar) Register(
   614  	_ *refraction_networking_client.ConjureSession,
   615  	_ context.Context) (*refraction_networking_client.ConjureReg, error) {
   616  
   617  	return r.registration, nil
   618  }
   619  
   620  type recordRegistrar struct {
   621  	registrar    refraction_networking_client.Registrar
   622  	registration *refraction_networking_client.ConjureReg
   623  }
   624  
   625  func (r *recordRegistrar) Register(
   626  	session *refraction_networking_client.ConjureSession,
   627  	ctx context.Context) (*refraction_networking_client.ConjureReg, error) {
   628  
   629  	registration, err := r.registrar.Register(session, ctx)
   630  	if err != nil {
   631  		return nil, errors.Trace(err)
   632  	}
   633  	r.registration = registration
   634  	return registration, nil
   635  }
   636  
   637  // minTransportConn buffers the first 32-byte random HMAC write performed by
   638  // Conjure TransportType_Min, and prepends it to the subsequent first write
   639  // made by OSSH. The purpose is to avoid a distinct fingerprint consisting of
   640  // the initial TCP data packet always containing exactly 32 bytes of payload.
   641  // The first write by OSSH will be a variable length multi-packet-sized
   642  // sequence of random bytes.
   643  type minTransportConn struct {
   644  	net.Conn
   645  
   646  	mutex  sync.Mutex
   647  	state  int
   648  	buffer []byte
   649  	err    error
   650  }
   651  
   652  const (
   653  	stateMinTransportInit = iota
   654  	stateMinTransportBufferedHMAC
   655  	stateMinTransportWroteHMAC
   656  	stateMinTransportFailed
   657  )
   658  
   659  func newMinTransportConn(conn net.Conn) *minTransportConn {
   660  	return &minTransportConn{
   661  		Conn:  conn,
   662  		state: stateMinTransportInit,
   663  	}
   664  }
   665  
   666  func (conn *minTransportConn) Write(p []byte) (int, error) {
   667  	conn.mutex.Lock()
   668  	defer conn.mutex.Unlock()
   669  
   670  	switch conn.state {
   671  	case stateMinTransportInit:
   672  		if len(p) != sha256.Size {
   673  			conn.state = stateMinTransportFailed
   674  			conn.err = errors.TraceNew("unexpected HMAC write size")
   675  			return 0, conn.err
   676  		}
   677  		conn.buffer = make([]byte, sha256.Size)
   678  		copy(conn.buffer, p)
   679  		conn.state = stateMinTransportBufferedHMAC
   680  		return sha256.Size, nil
   681  	case stateMinTransportBufferedHMAC:
   682  		conn.buffer = append(conn.buffer, p...)
   683  		n, err := conn.Conn.Write(conn.buffer)
   684  		if n < sha256.Size {
   685  			conn.state = stateMinTransportFailed
   686  			conn.err = errors.TraceNew("failed to write HMAC")
   687  			if err == nil {
   688  				// As Write must return an error when failing to write the entire buffer,
   689  				// we don't expect to hit this case.
   690  				err = conn.err
   691  			}
   692  		} else {
   693  			conn.state = stateMinTransportWroteHMAC
   694  		}
   695  		n -= sha256.Size
   696  		// Do not wrap Conn.Write errors, and do not return conn.err here.
   697  		return n, err
   698  	case stateMinTransportWroteHMAC:
   699  		return conn.Conn.Write(p)
   700  	case stateMinTransportFailed:
   701  		return 0, conn.err
   702  	default:
   703  		return 0, errors.TraceNew("unexpected state")
   704  	}
   705  }
   706  
   707  func newMinTransportDialer(dialer common.Dialer) common.Dialer {
   708  	return func(ctx context.Context, network, address string) (net.Conn, error) {
   709  		conn, err := dialer(ctx, network, address)
   710  		if err != nil {
   711  			return nil, errors.Trace(err)
   712  		}
   713  		return newMinTransportConn(conn), nil
   714  	}
   715  }
   716  
   717  // dialManager tracks all dials performed by and dialed conns used by a
   718  // refraction_networking_client conn. dialManager.close interrupts/closes
   719  // all pending dials and established conns immediately. This ensures that
   720  // blocking calls within refraction_networking_client, such as tls.Handhake,
   721  // are interrupted:
   722  // E.g., https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L307
   723  // (...preceeding SetDeadline is insufficient for immediate cancellation.)
   724  type dialManager struct {
   725  	ctxMutex       sync.Mutex
   726  	useRunCtx      bool
   727  	initialDialCtx context.Context
   728  	runCtx         context.Context
   729  	stopRunning    context.CancelFunc
   730  
   731  	conns *common.Conns
   732  }
   733  
   734  func newDialManager() *dialManager {
   735  	runCtx, stopRunning := context.WithCancel(context.Background())
   736  	return &dialManager{
   737  		runCtx:      runCtx,
   738  		stopRunning: stopRunning,
   739  		conns:       common.NewConns(),
   740  	}
   741  }
   742  
   743  func (manager *dialManager) makeManagedDialer(dialer common.Dialer) common.Dialer {
   744  
   745  	return func(ctx context.Context, network, address string) (net.Conn, error) {
   746  		return manager.dialWithDialer(dialer, ctx, network, address)
   747  	}
   748  }
   749  
   750  func (manager *dialManager) dialWithDialer(
   751  	dialer common.Dialer,
   752  	ctx context.Context,
   753  	network string,
   754  	address string) (net.Conn, error) {
   755  
   756  	if network != "tcp" {
   757  		return nil, errors.Tracef("unsupported network: %s", network)
   758  	}
   759  
   760  	// The context for this dial is either:
   761  	// - ctx, during the initial refraction_networking_client.DialContext, when
   762  	//   this is Psiphon tunnel establishment.
   763  	// - manager.runCtx after the initial refraction_networking_client.Dial
   764  	//   completes, in which case this is a TapDance protocol reconnection that
   765  	//   occurs periodically for already established tunnels.
   766  
   767  	manager.ctxMutex.Lock()
   768  	if manager.useRunCtx {
   769  
   770  		// Preserve the random timeout configured by the TapDance client:
   771  		// https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L263
   772  		deadline, ok := ctx.Deadline()
   773  		if !ok {
   774  			return nil, errors.Tracef("unexpected nil deadline")
   775  		}
   776  		var cancelFunc context.CancelFunc
   777  		ctx, cancelFunc = context.WithDeadline(manager.runCtx, deadline)
   778  		defer cancelFunc()
   779  	}
   780  	manager.ctxMutex.Unlock()
   781  
   782  	conn, err := dialer(ctx, network, address)
   783  	if err != nil {
   784  		return nil, errors.Trace(err)
   785  	}
   786  
   787  	// Fail immediately if CloseWrite isn't available in the underlying dialed
   788  	// conn. The equivalent check in managedConn.CloseWrite isn't fatal and
   789  	// TapDance will run in a degraded state.
   790  	// Limitation: if the underlying conn _also_ passes through CloseWrite, this
   791  	// check may be insufficient.
   792  	if _, ok := conn.(common.CloseWriter); !ok {
   793  		return nil, errors.TraceNew("underlying conn is not a CloseWriter")
   794  	}
   795  
   796  	conn = &managedConn{
   797  		Conn:    conn,
   798  		manager: manager,
   799  	}
   800  
   801  	if !manager.conns.Add(conn) {
   802  		conn.Close()
   803  		return nil, errors.TraceNew("already closed")
   804  	}
   805  
   806  	return conn, nil
   807  }
   808  
   809  func (manager *dialManager) startUsingRunCtx() {
   810  	manager.ctxMutex.Lock()
   811  	manager.initialDialCtx = nil
   812  	manager.useRunCtx = true
   813  	manager.ctxMutex.Unlock()
   814  }
   815  
   816  func (manager *dialManager) close() {
   817  	manager.conns.CloseAll()
   818  	manager.stopRunning()
   819  }
   820  
   821  type managedConn struct {
   822  	net.Conn
   823  	manager *dialManager
   824  }
   825  
   826  // CloseWrite exposes the net.TCPConn.CloseWrite() functionality
   827  // required by TapDance.
   828  func (conn *managedConn) CloseWrite() error {
   829  	if closeWriter, ok := conn.Conn.(common.CloseWriter); ok {
   830  		return closeWriter.CloseWrite()
   831  	}
   832  	return errors.TraceNew("underlying conn is not a CloseWriter")
   833  }
   834  
   835  func (conn *managedConn) Close() error {
   836  	// Remove must be invoked asynchronously, as this Close may be called by
   837  	// conns.CloseAll, leading to a reentrant lock situation.
   838  	go conn.manager.conns.Remove(conn)
   839  	return conn.Conn.Close()
   840  }
   841  
   842  type refractionConn struct {
   843  	net.Conn
   844  	manager  *dialManager
   845  	isClosed int32
   846  
   847  	isConjure        bool
   848  	conjureCached    bool
   849  	conjureDelay     time.Duration
   850  	conjureTransport string
   851  }
   852  
   853  func (conn *refractionConn) Close() error {
   854  	conn.manager.close()
   855  	err := conn.Conn.Close()
   856  	atomic.StoreInt32(&conn.isClosed, 1)
   857  	return err
   858  }
   859  
   860  func (conn *refractionConn) IsClosed() bool {
   861  	return atomic.LoadInt32(&conn.isClosed) == 1
   862  }
   863  
   864  // GetMetrics implements the common.MetricsSource interface.
   865  func (conn *refractionConn) GetMetrics() common.LogFields {
   866  	logFields := make(common.LogFields)
   867  	if conn.isConjure {
   868  
   869  		cached := "0"
   870  		if conn.conjureCached {
   871  			cached = "1"
   872  		}
   873  		logFields["conjure_cached"] = cached
   874  
   875  		if conn.conjureDelay != -1 {
   876  			logFields["conjure_delay"] = fmt.Sprintf("%d", conn.conjureDelay/time.Millisecond)
   877  		}
   878  
   879  		logFields["conjure_transport"] = conn.conjureTransport
   880  	}
   881  	return logFields
   882  }
   883  
   884  var initRefractionNetworkingOnce sync.Once
   885  
   886  func initRefractionNetworking(emitLogs bool, dataDirectory string) error {
   887  
   888  	var initErr error
   889  	initRefractionNetworkingOnce.Do(func() {
   890  
   891  		if !emitLogs {
   892  			refraction_networking_client.Logger().Out = ioutil.Discard
   893  		}
   894  
   895  		assetsDir := filepath.Join(dataDirectory, "refraction-networking")
   896  
   897  		err := os.MkdirAll(assetsDir, 0700)
   898  		if err != nil {
   899  			initErr = errors.Trace(err)
   900  			return
   901  		}
   902  
   903  		clientConfFileName := filepath.Join(assetsDir, "ClientConf")
   904  		_, err = os.Stat(clientConfFileName)
   905  		if err != nil && os.IsNotExist(err) {
   906  			err = ioutil.WriteFile(clientConfFileName, getEmbeddedClientConf(), 0644)
   907  		}
   908  		if err != nil {
   909  			initErr = errors.Trace(err)
   910  			return
   911  		}
   912  
   913  		refraction_networking_client.AssetsSetDir(assetsDir)
   914  	})
   915  
   916  	return initErr
   917  }