github.com/astaguna/popon-core@v0.0.0-20231019235610-96e42d76a5ff/psiphon/controller.go (about)

     1  /*
     2   * Copyright (c) 2015, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package psiphon implements the core tunnel functionality of a Psiphon client.
    21  // The main function is RunForever, which runs a Controller that obtains lists of
    22  // servers, establishes tunnel connections, and runs local proxies through which
    23  // tunneled traffic may be sent.
    24  package psiphon
    25  
    26  import (
    27  	"context"
    28  	"fmt"
    29  	"math/rand"
    30  	"net"
    31  	"runtime"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  
    36  	"github.com/astaguna/popon-core/psiphon/common"
    37  	"github.com/astaguna/popon-core/psiphon/common/errors"
    38  	"github.com/astaguna/popon-core/psiphon/common/parameters"
    39  	"github.com/astaguna/popon-core/psiphon/common/prng"
    40  	"github.com/astaguna/popon-core/psiphon/common/protocol"
    41  	"github.com/astaguna/popon-core/psiphon/common/resolver"
    42  	"github.com/astaguna/popon-core/psiphon/common/tun"
    43  	lrucache "github.com/cognusion/go-cache-lru"
    44  )
    45  
    46  // Controller is a tunnel lifecycle coordinator. It manages lists of servers to
    47  // connect to; establishes and monitors tunnels; and runs local proxies which
    48  // route traffic through the tunnels.
    49  type Controller struct {
    50  	config                                  *Config
    51  	runCtx                                  context.Context
    52  	stopRunning                             context.CancelFunc
    53  	runWaitGroup                            *sync.WaitGroup
    54  	connectedTunnels                        chan *Tunnel
    55  	failedTunnels                           chan *Tunnel
    56  	tunnelMutex                             sync.Mutex
    57  	establishedOnce                         bool
    58  	tunnelPoolSize                          int
    59  	tunnels                                 []*Tunnel
    60  	nextTunnel                              int
    61  	isEstablishing                          bool
    62  	establishStartTime                      time.Time
    63  	protocolSelectionConstraints            *protocolSelectionConstraints
    64  	concurrentEstablishTunnelsMutex         sync.Mutex
    65  	establishConnectTunnelCount             int
    66  	concurrentEstablishTunnels              int
    67  	concurrentIntensiveEstablishTunnels     int
    68  	peakConcurrentEstablishTunnels          int
    69  	peakConcurrentIntensiveEstablishTunnels int
    70  	establishCtx                            context.Context
    71  	stopEstablish                           context.CancelFunc
    72  	establishWaitGroup                      *sync.WaitGroup
    73  	establishedTunnelsCount                 int32
    74  	candidateServerEntries                  chan *candidateServerEntry
    75  	untunneledDialConfig                    *DialConfig
    76  	untunneledSplitTunnelClassifications    *lrucache.Cache
    77  	splitTunnelClassificationTTL            time.Duration
    78  	splitTunnelClassificationMaxEntries     int
    79  	signalFetchCommonRemoteServerList       chan struct{}
    80  	signalFetchObfuscatedServerLists        chan struct{}
    81  	signalDownloadUpgrade                   chan string
    82  	signalReportServerEntries               chan *serverEntriesReportRequest
    83  	signalReportConnected                   chan struct{}
    84  	signalRestartEstablishing               chan struct{}
    85  	serverAffinityDoneBroadcast             chan struct{}
    86  	packetTunnelClient                      *tun.Client
    87  	packetTunnelTransport                   *PacketTunnelTransport
    88  	staggerMutex                            sync.Mutex
    89  	resolver                                *resolver.Resolver
    90  }
    91  
    92  // NewController initializes a new controller.
    93  func NewController(config *Config) (controller *Controller, err error) {
    94  
    95  	if !config.IsCommitted() {
    96  		return nil, errors.TraceNew("uncommitted config")
    97  	}
    98  
    99  	// Needed by regen, at least
   100  	rand.Seed(int64(time.Now().Nanosecond()))
   101  
   102  	// The session ID for the Psiphon server API is used across all
   103  	// tunnels established by the controller.
   104  	NoticeSessionId(config.SessionID)
   105  
   106  	// Attempt to apply any valid, local stored tactics. The pre-done context
   107  	// ensures no tactics request is attempted now.
   108  	doneContext, cancelFunc := context.WithCancel(context.Background())
   109  	cancelFunc()
   110  	GetTactics(doneContext, config)
   111  
   112  	p := config.GetParameters().Get()
   113  	splitTunnelClassificationTTL :=
   114  		p.Duration(parameters.SplitTunnelClassificationTTL)
   115  	splitTunnelClassificationMaxEntries :=
   116  		p.Int(parameters.SplitTunnelClassificationMaxEntries)
   117  
   118  	controller = &Controller{
   119  		config:       config,
   120  		runWaitGroup: new(sync.WaitGroup),
   121  		// connectedTunnels and failedTunnels buffer sizes are large enough to
   122  		// receive full pools of tunnels without blocking. Senders should not block.
   123  		connectedTunnels: make(chan *Tunnel, MAX_TUNNEL_POOL_SIZE),
   124  		failedTunnels:    make(chan *Tunnel, MAX_TUNNEL_POOL_SIZE),
   125  		tunnelPoolSize:   TUNNEL_POOL_SIZE,
   126  		tunnels:          make([]*Tunnel, 0),
   127  		establishedOnce:  false,
   128  		isEstablishing:   false,
   129  
   130  		untunneledSplitTunnelClassifications: lrucache.NewWithLRU(
   131  			splitTunnelClassificationTTL,
   132  			1*time.Minute,
   133  			splitTunnelClassificationMaxEntries),
   134  
   135  		// TODO: Add a buffer of 1 so we don't miss a signal while receiver is
   136  		// starting? Trade-off is potential back-to-back fetch remotes. As-is,
   137  		// establish will eventually signal another fetch remote.
   138  		signalFetchCommonRemoteServerList: make(chan struct{}),
   139  		signalFetchObfuscatedServerLists:  make(chan struct{}),
   140  		signalDownloadUpgrade:             make(chan string),
   141  		signalReportConnected:             make(chan struct{}),
   142  
   143  		// Using a buffer of 1 to ensure there's no race between the first signal
   144  		// sent and a channel receiver initializing; a side effect is that this
   145  		// allows 1 additional scan to enqueue while a scan is in progress, possibly
   146  		// resulting in one unnecessary scan.
   147  		signalReportServerEntries: make(chan *serverEntriesReportRequest, 1),
   148  
   149  		// signalRestartEstablishing has a buffer of 1 to ensure sending the
   150  		// signal doesn't block and receiving won't miss a signal.
   151  		signalRestartEstablishing: make(chan struct{}, 1),
   152  	}
   153  
   154  	// Initialize untunneledDialConfig, used by untunneled dials including
   155  	// remote server list and upgrade downloads.
   156  	controller.untunneledDialConfig = &DialConfig{
   157  		UpstreamProxyURL: controller.config.UpstreamProxyURL,
   158  		CustomHeaders:    controller.config.CustomHeaders,
   159  		DeviceBinder:     controller.config.deviceBinder,
   160  		IPv6Synthesizer:  controller.config.IPv6Synthesizer,
   161  		ResolveIP: func(ctx context.Context, hostname string) ([]net.IP, error) {
   162  			// Note: when domain fronting would be used for untunneled dials a
   163  			// copy of untunneledDialConfig should be used instead, which
   164  			// redefines ResolveIP such that the corresponding fronting
   165  			// provider ID is passed into UntunneledResolveIP to enable the use
   166  			// of pre-resolved IPs.
   167  			IPs, err := UntunneledResolveIP(
   168  				ctx, controller.config, controller.resolver, hostname, "")
   169  			if err != nil {
   170  				return nil, errors.Trace(err)
   171  			}
   172  			return IPs, nil
   173  		},
   174  		TrustedCACertificatesFilename: controller.config.TrustedCACertificatesFilename,
   175  	}
   176  
   177  	if config.PacketTunnelTunFileDescriptor > 0 {
   178  
   179  		// Run a packet tunnel client. The lifetime of the tun.Client is the
   180  		// lifetime of the Controller, so it exists across tunnel establishments
   181  		// and reestablishments. The PacketTunnelTransport provides a layer
   182  		// that presents a continuosuly existing transport to the tun.Client;
   183  		// it's set to use new SSH channels after new SSH tunnel establishes.
   184  
   185  		packetTunnelTransport := NewPacketTunnelTransport()
   186  
   187  		packetTunnelClient, err := tun.NewClient(&tun.ClientConfig{
   188  			Logger:                    NoticeCommonLogger(),
   189  			TunFileDescriptor:         config.PacketTunnelTunFileDescriptor,
   190  			TransparentDNSIPv4Address: config.PacketTunnelTransparentDNSIPv4Address,
   191  			TransparentDNSIPv6Address: config.PacketTunnelTransparentDNSIPv6Address,
   192  			Transport:                 packetTunnelTransport,
   193  		})
   194  		if err != nil {
   195  			return nil, errors.Trace(err)
   196  		}
   197  
   198  		controller.packetTunnelClient = packetTunnelClient
   199  		controller.packetTunnelTransport = packetTunnelTransport
   200  	}
   201  
   202  	return controller, nil
   203  }
   204  
   205  // Run executes the controller. Run exits if a controller
   206  // component fails or the parent context is canceled.
   207  func (controller *Controller) Run(ctx context.Context) {
   208  
   209  	if controller.config.LimitCPUThreads {
   210  		runtime.GOMAXPROCS(1)
   211  	}
   212  
   213  	pprofRun()
   214  
   215  	// Ensure fresh repetitive notice state for each run, so the
   216  	// client will always get an AvailableEgressRegions notice,
   217  	// an initial instance of any repetitive error notice, etc.
   218  	ResetRepetitiveNotices()
   219  
   220  	runCtx, stopRunning := context.WithCancel(ctx)
   221  	defer stopRunning()
   222  
   223  	controller.runCtx = runCtx
   224  	controller.stopRunning = stopRunning
   225  
   226  	// Start components
   227  
   228  	// Initialize a single resolver to be used by all dials. Sharing a single
   229  	// resolver ensures cached results are shared, and that network state
   230  	// query overhead is amortized over all dials. Multiple dials can resolve
   231  	// domain concurrently.
   232  	//
   233  	// config.SetResolver makes this resolver available to MakeDialParameters.
   234  	controller.resolver = NewResolver(controller.config, true)
   235  	defer controller.resolver.Stop()
   236  	controller.config.SetResolver(controller.resolver)
   237  
   238  	// TODO: IPv6 support
   239  	var listenIP string
   240  	if controller.config.ListenInterface == "" {
   241  		listenIP = "127.0.0.1"
   242  	} else if controller.config.ListenInterface == "any" {
   243  		listenIP = "0.0.0.0"
   244  	} else {
   245  		IPv4Address, _, err := common.GetInterfaceIPAddresses(controller.config.ListenInterface)
   246  		if err == nil && IPv4Address == nil {
   247  			err = fmt.Errorf("no IPv4 address for interface %s", controller.config.ListenInterface)
   248  		}
   249  		if err != nil {
   250  			NoticeError("error getting listener IP: %v", errors.Trace(err))
   251  			return
   252  		}
   253  		listenIP = IPv4Address.String()
   254  	}
   255  
   256  	if !controller.config.DisableLocalSocksProxy {
   257  		socksProxy, err := NewSocksProxy(controller.config, controller, listenIP)
   258  		if err != nil {
   259  			NoticeError("error initializing local SOCKS proxy: %v", errors.Trace(err))
   260  			return
   261  		}
   262  		defer socksProxy.Close()
   263  	}
   264  
   265  	if !controller.config.DisableLocalHTTPProxy {
   266  		httpProxy, err := NewHttpProxy(controller.config, controller, listenIP)
   267  		if err != nil {
   268  			NoticeError("error initializing local HTTP proxy: %v", errors.Trace(err))
   269  			return
   270  		}
   271  		defer httpProxy.Close()
   272  	}
   273  
   274  	if !controller.config.DisableRemoteServerListFetcher {
   275  
   276  		if controller.config.RemoteServerListURLs != nil {
   277  			controller.runWaitGroup.Add(1)
   278  			go controller.remoteServerListFetcher(
   279  				"common",
   280  				FetchCommonRemoteServerList,
   281  				controller.signalFetchCommonRemoteServerList)
   282  		}
   283  
   284  		if controller.config.ObfuscatedServerListRootURLs != nil {
   285  			controller.runWaitGroup.Add(1)
   286  			go controller.remoteServerListFetcher(
   287  				"obfuscated",
   288  				FetchObfuscatedServerLists,
   289  				controller.signalFetchObfuscatedServerLists)
   290  		}
   291  	}
   292  
   293  	if controller.config.UpgradeDownloadURLs != nil {
   294  		controller.runWaitGroup.Add(1)
   295  		go controller.upgradeDownloader()
   296  	}
   297  
   298  	controller.runWaitGroup.Add(1)
   299  	go controller.serverEntriesReporter()
   300  
   301  	controller.runWaitGroup.Add(1)
   302  	go controller.connectedReporter()
   303  
   304  	controller.runWaitGroup.Add(1)
   305  	go controller.establishTunnelWatcher()
   306  
   307  	controller.runWaitGroup.Add(1)
   308  	go controller.runTunnels()
   309  
   310  	if controller.packetTunnelClient != nil {
   311  		controller.packetTunnelClient.Start()
   312  	}
   313  
   314  	// Wait while running
   315  
   316  	<-controller.runCtx.Done()
   317  	NoticeInfo("controller stopped")
   318  
   319  	if controller.packetTunnelClient != nil {
   320  		controller.packetTunnelClient.Stop()
   321  	}
   322  
   323  	// All workers -- runTunnels, establishment workers, and auxilliary
   324  	// workers such as fetch remote server list and untunneled uprade
   325  	// download -- operate with the controller run context and will all
   326  	// be interrupted when the run context is done.
   327  
   328  	controller.runWaitGroup.Wait()
   329  
   330  	NoticeInfo("exiting controller")
   331  
   332  	NoticeExiting()
   333  }
   334  
   335  // SignalComponentFailure notifies the controller that an associated component has failed.
   336  // This will terminate the controller.
   337  func (controller *Controller) SignalComponentFailure() {
   338  	NoticeWarning("controller shutdown due to component failure")
   339  	controller.stopRunning()
   340  }
   341  
   342  // SetDynamicConfig overrides the sponsor ID and authorizations fields of the
   343  // Controller config with the input values. The new values will be used in the
   344  // next tunnel connection.
   345  func (controller *Controller) SetDynamicConfig(sponsorID string, authorizations []string) {
   346  	controller.config.SetDynamicConfig(sponsorID, authorizations)
   347  }
   348  
   349  // TerminateNextActiveTunnel terminates the active tunnel, which will initiate
   350  // establishment of a new tunnel.
   351  func (controller *Controller) TerminateNextActiveTunnel() {
   352  	tunnel := controller.getNextActiveTunnel()
   353  	if tunnel != nil {
   354  		controller.SignalTunnelFailure(tunnel)
   355  		NoticeInfo("terminated tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
   356  	}
   357  }
   358  
   359  // ExportExchangePayload creates a payload for client-to-client server
   360  // connection info exchange. See the comment for psiphon.ExportExchangePayload
   361  // for more details.
   362  func (controller *Controller) ExportExchangePayload() string {
   363  	return ExportExchangePayload(controller.config)
   364  }
   365  
   366  // ImportExchangePayload imports a payload generated by ExportExchangePayload.
   367  // See the comment for psiphon.ImportExchangePayload for more details about
   368  // the import.
   369  //
   370  // When the import is successful, a signal is set to trigger a restart any
   371  // establishment in progress. This will cause the newly imported server entry
   372  // to be prioritized, which it otherwise would not be in later establishment
   373  // rounds. The establishment process continues after ImportExchangePayload
   374  // returns.
   375  //
   376  // If the client already has a connected tunnel, or a tunnel connection is
   377  // established concurrently with the import, the signal has no effect as the
   378  // overall goal is establish _any_ connection.
   379  func (controller *Controller) ImportExchangePayload(payload string) bool {
   380  
   381  	// Race condition: if a new tunnel connection is established concurrently
   382  	// with the import, either that tunnel's server entry of the imported server
   383  	// entry may end up as the affinity server.
   384  
   385  	ok := ImportExchangePayload(controller.config, payload)
   386  	if !ok {
   387  		return false
   388  	}
   389  
   390  	select {
   391  	case controller.signalRestartEstablishing <- struct{}{}:
   392  	default:
   393  	}
   394  
   395  	return true
   396  }
   397  
   398  // remoteServerListFetcher fetches an out-of-band list of server entries
   399  // for more tunnel candidates. It fetches when signalled, with retries
   400  // on failure.
   401  func (controller *Controller) remoteServerListFetcher(
   402  	name string,
   403  	fetcher RemoteServerListFetcher,
   404  	signal <-chan struct{}) {
   405  
   406  	defer controller.runWaitGroup.Done()
   407  
   408  	var lastFetchTime time.Time
   409  
   410  fetcherLoop:
   411  	for {
   412  		// Wait for a signal before fetching
   413  		select {
   414  		case <-signal:
   415  		case <-controller.runCtx.Done():
   416  			break fetcherLoop
   417  		}
   418  
   419  		// Skip fetch entirely (i.e., send no request at all, even when ETag would save
   420  		// on response size) when a recent fetch was successful
   421  
   422  		stalePeriod := controller.config.GetParameters().Get().Duration(
   423  			parameters.FetchRemoteServerListStalePeriod)
   424  
   425  		if !lastFetchTime.IsZero() &&
   426  			lastFetchTime.Add(stalePeriod).After(time.Now()) {
   427  			continue
   428  		}
   429  
   430  	retryLoop:
   431  		for attempt := 0; ; attempt++ {
   432  			// Don't attempt to fetch while there is no network connectivity,
   433  			// to avoid alert notice noise.
   434  			if !WaitForNetworkConnectivity(
   435  				controller.runCtx,
   436  				controller.config.NetworkConnectivityChecker) {
   437  				break fetcherLoop
   438  			}
   439  
   440  			// Pick any active tunnel and make the next fetch attempt. If there's
   441  			// no active tunnel, the untunneledDialConfig will be used.
   442  			tunnel := controller.getNextActiveTunnel()
   443  
   444  			err := fetcher(
   445  				controller.runCtx,
   446  				controller.config,
   447  				attempt,
   448  				tunnel,
   449  				controller.untunneledDialConfig)
   450  
   451  			if err == nil {
   452  				lastFetchTime = time.Now()
   453  				break retryLoop
   454  			}
   455  
   456  			NoticeWarning("failed to fetch %s remote server list: %v",
   457  				name, errors.Trace(err))
   458  
   459  			retryPeriod := controller.config.GetParameters().Get().Duration(
   460  				parameters.FetchRemoteServerListRetryPeriod)
   461  
   462  			timer := time.NewTimer(retryPeriod)
   463  			select {
   464  			case <-timer.C:
   465  			case <-controller.runCtx.Done():
   466  				timer.Stop()
   467  				break fetcherLoop
   468  			}
   469  		}
   470  	}
   471  
   472  	NoticeInfo("exiting %s remote server list fetcher", name)
   473  }
   474  
   475  // upgradeDownloader makes periodic attempts to complete a client upgrade
   476  // download. DownloadUpgrade() is resumable, so each attempt has potential for
   477  // getting closer to completion, even in conditions where the download or
   478  // tunnel is repeatedly interrupted.
   479  // An upgrade download is triggered by either a handshake response indicating
   480  // that a new version is available; or after failing to connect, in which case
   481  // it's useful to check, out-of-band, for an upgrade with new circumvention
   482  // capabilities.
   483  // Once the download operation completes successfully, the downloader exits
   484  // and is not run again: either there is not a newer version, or the upgrade
   485  // has been downloaded and is ready to be applied.
   486  // We're assuming that the upgrade will be applied and the entire system
   487  // restarted before another upgrade is to be downloaded.
   488  //
   489  // TODO: refactor upgrade downloader and remote server list fetcher to use
   490  // common code (including the resumable download routines).
   491  func (controller *Controller) upgradeDownloader() {
   492  	defer controller.runWaitGroup.Done()
   493  
   494  	var lastDownloadTime time.Time
   495  
   496  downloadLoop:
   497  	for {
   498  		// Wait for a signal before downloading
   499  		var handshakeVersion string
   500  		select {
   501  		case handshakeVersion = <-controller.signalDownloadUpgrade:
   502  		case <-controller.runCtx.Done():
   503  			break downloadLoop
   504  		}
   505  
   506  		stalePeriod := controller.config.GetParameters().Get().Duration(
   507  			parameters.FetchUpgradeStalePeriod)
   508  
   509  		// Unless handshake is explicitly advertizing a new version, skip
   510  		// checking entirely when a recent download was successful.
   511  		if handshakeVersion == "" &&
   512  			!lastDownloadTime.IsZero() &&
   513  			lastDownloadTime.Add(stalePeriod).After(time.Now()) {
   514  			continue
   515  		}
   516  
   517  	retryLoop:
   518  		for attempt := 0; ; attempt++ {
   519  			// Don't attempt to download while there is no network connectivity,
   520  			// to avoid alert notice noise.
   521  			if !WaitForNetworkConnectivity(
   522  				controller.runCtx,
   523  				controller.config.NetworkConnectivityChecker) {
   524  				break downloadLoop
   525  			}
   526  
   527  			// Pick any active tunnel and make the next download attempt. If there's
   528  			// no active tunnel, the untunneledDialConfig will be used.
   529  			tunnel := controller.getNextActiveTunnel()
   530  
   531  			err := DownloadUpgrade(
   532  				controller.runCtx,
   533  				controller.config,
   534  				attempt,
   535  				handshakeVersion,
   536  				tunnel,
   537  				controller.untunneledDialConfig)
   538  
   539  			if err == nil {
   540  				lastDownloadTime = time.Now()
   541  				break retryLoop
   542  			}
   543  
   544  			NoticeWarning("failed to download upgrade: %v", errors.Trace(err))
   545  
   546  			timeout := controller.config.GetParameters().Get().Duration(
   547  				parameters.FetchUpgradeRetryPeriod)
   548  
   549  			timer := time.NewTimer(timeout)
   550  			select {
   551  			case <-timer.C:
   552  			case <-controller.runCtx.Done():
   553  				timer.Stop()
   554  				break downloadLoop
   555  			}
   556  		}
   557  	}
   558  
   559  	NoticeInfo("exiting upgrade downloader")
   560  }
   561  
   562  type serverEntriesReportRequest struct {
   563  	constraints   *protocolSelectionConstraints
   564  	awaitResponse chan *serverEntriesReportResponse
   565  }
   566  
   567  type serverEntriesReportResponse struct {
   568  	err                              error
   569  	candidates                       int
   570  	initialCandidates                int
   571  	initialCandidatesAnyEgressRegion int
   572  	availableEgressRegions           []string
   573  }
   574  
   575  // serverEntriesReporter performs scans over all server entries to report on
   576  // available tunnel candidates, subject to protocol selection constraints, and
   577  // available egress regions.
   578  //
   579  // Because scans may be slow, depending on the client device and server entry
   580  // list size, serverEntriesReporter is used to perform asychronous, background
   581  // operations that would otherwise block establishment. This includes emitting
   582  // diagnotic notices that are informational (CandidateServers) or which do not
   583  // need to emit before establishment starts (AvailableEgressRegions).
   584  //
   585  // serverEntriesReporter also serves to combine these scans, which would
   586  // otherwise be logically independent, due to the performance impact of scans.
   587  //
   588  // The underlying datastore implementation _may_ block write transactions
   589  // while there are open read transactions. For example, bolt write
   590  // transactions which need to  re-map the data file (when the datastore grows)
   591  // will block on open read transactions. In these scenarios, a slow scan will
   592  // still block other operations.
   593  //
   594  // serverEntriesReporter runs beyond the establishment phase, since it's
   595  // important for notices such as AvailableEgressRegions to eventually emit
   596  // even if already established. serverEntriesReporter scans are cancellable,
   597  // so controller shutdown is not blocked by slow scans.
   598  //
   599  // In some special cases, establishment cannot begin without candidate counts
   600  // up front. In these cases only, the request contains a non-nil
   601  // awaitResponse, a channel which is used by the requester to block until the
   602  // scan is complete and the candidate counts are available.
   603  func (controller *Controller) serverEntriesReporter() {
   604  	defer controller.runWaitGroup.Done()
   605  
   606  loop:
   607  	for {
   608  
   609  		var request *serverEntriesReportRequest
   610  
   611  		select {
   612  		case request = <-controller.signalReportServerEntries:
   613  		case <-controller.runCtx.Done():
   614  			break loop
   615  		}
   616  
   617  		egressRegion := controller.config.EgressRegion
   618  		constraints := request.constraints
   619  
   620  		var response serverEntriesReportResponse
   621  
   622  		regions := make(map[string]bool)
   623  
   624  		callback := func(serverEntry *protocol.ServerEntry) bool {
   625  
   626  			// In establishment, excludeIntensive depends on what set of protocols are
   627  			// already being dialed. For these reports, don't exclude intensive
   628  			// protocols as any intensive candidate can always be an available
   629  			// candidate at some point.
   630  			excludeIntensive := false
   631  
   632  			isInitialCandidate := constraints.isInitialCandidate(excludeIntensive, serverEntry)
   633  			isCandidate := constraints.isCandidate(excludeIntensive, serverEntry)
   634  
   635  			if isInitialCandidate {
   636  				response.initialCandidatesAnyEgressRegion += 1
   637  			}
   638  
   639  			if egressRegion == "" || serverEntry.Region == egressRegion {
   640  				if isInitialCandidate {
   641  					response.initialCandidates += 1
   642  				}
   643  				if isCandidate {
   644  					response.candidates += 1
   645  				}
   646  			}
   647  
   648  			isAvailable := isCandidate
   649  			if constraints.hasInitialProtocols() {
   650  				// Available egress regions is subject to an initial limit constraint, if
   651  				// present: see AvailableEgressRegions comment in launchEstablishing.
   652  				isAvailable = isInitialCandidate
   653  			}
   654  
   655  			if isAvailable {
   656  				// Ignore server entries with no region field.
   657  				if serverEntry.Region != "" {
   658  					regions[serverEntry.Region] = true
   659  				}
   660  			}
   661  
   662  			select {
   663  			case <-controller.runCtx.Done():
   664  				// Don't block controller shutdown: cancel the scan.
   665  				return false
   666  			default:
   667  				return true
   668  			}
   669  		}
   670  
   671  		startTime := time.Now()
   672  
   673  		response.err = ScanServerEntries(callback)
   674  
   675  		// Report this duration in CandidateServers as an indication of datastore
   676  		// performance.
   677  		duration := time.Since(startTime)
   678  
   679  		response.availableEgressRegions = make([]string, 0, len(regions))
   680  		for region := range regions {
   681  			response.availableEgressRegions = append(response.availableEgressRegions, region)
   682  		}
   683  
   684  		if response.err != nil {
   685  
   686  			// For diagnostics, we'll post this even when cancelled due to shutdown.
   687  			NoticeWarning("ScanServerEntries failed: %v", errors.Trace(response.err))
   688  
   689  			// Continue and send error reponse. Clear any partial data to avoid
   690  			// misuse.
   691  			response.candidates = 0
   692  			response.initialCandidates = 0
   693  			response.initialCandidatesAnyEgressRegion = 0
   694  			response.availableEgressRegions = []string{}
   695  		}
   696  
   697  		if request.awaitResponse != nil {
   698  			select {
   699  			case request.awaitResponse <- &response:
   700  			case <-controller.runCtx.Done():
   701  				// The receiver may be gone when shutting down.
   702  			}
   703  		}
   704  
   705  		if response.err == nil {
   706  
   707  			NoticeCandidateServers(
   708  				controller.config.EgressRegion,
   709  				controller.protocolSelectionConstraints,
   710  				response.initialCandidates,
   711  				response.candidates,
   712  				duration)
   713  
   714  			NoticeAvailableEgressRegions(
   715  				response.availableEgressRegions)
   716  		}
   717  	}
   718  
   719  	NoticeInfo("exiting server entries reporter")
   720  }
   721  
   722  // signalServerEntriesReporter triggers a new server entry report. Set
   723  // request.awaitResponse to obtain the report output. When awaitResponse is
   724  // set, signalServerEntriesReporter blocks until the reporter receives the
   725  // request, guaranteeing the new report runs. Otherwise, the report is
   726  // considered to be informational and may or may not run, depending on whether
   727  // another run is already in progress.
   728  func (controller *Controller) signalServerEntriesReporter(request *serverEntriesReportRequest) {
   729  
   730  	if request.awaitResponse == nil {
   731  		select {
   732  		case controller.signalReportServerEntries <- request:
   733  		default:
   734  		}
   735  	} else {
   736  		controller.signalReportServerEntries <- request
   737  	}
   738  }
   739  
   740  // connectedReporter sends periodic "connected" requests to the Psiphon API.
   741  // These requests are for server-side unique user stats calculation. See the
   742  // comment in DoConnectedRequest for a description of the request mechanism.
   743  //
   744  // To correctly count daily unique users, only one connected request is made
   745  // across all simultaneous multi-tunnels; and the connected request is
   746  // repeated every 24h.
   747  //
   748  // The signalReportConnected mechanism is used to trigger a connected request
   749  // immediately after a reconnect. While strictly only one connected request
   750  // per 24h is required in order to count daily unique users, the connected
   751  // request also delivers the establishment duration metric (which includes
   752  // time elapsed performing the handshake request) and additional fragmentation
   753  // metrics; these metrics are measured for each tunnel.
   754  func (controller *Controller) connectedReporter() {
   755  	defer controller.runWaitGroup.Done()
   756  
   757  	// session is nil when DisableApi is set
   758  	if controller.config.DisableApi {
   759  		return
   760  	}
   761  
   762  	select {
   763  	case <-controller.signalReportConnected:
   764  		// Make the initial connected request
   765  	case <-controller.runCtx.Done():
   766  		return
   767  	}
   768  
   769  loop:
   770  	for {
   771  
   772  		// Pick any active tunnel and make the next connected request. No error is
   773  		// logged if there's no active tunnel, as that's not an unexpected
   774  		// condition.
   775  		reported := false
   776  		tunnel := controller.getNextActiveTunnel()
   777  		if tunnel != nil {
   778  			err := tunnel.serverContext.DoConnectedRequest()
   779  			if err == nil {
   780  				reported = true
   781  			} else {
   782  				NoticeWarning("failed to make connected request: %v",
   783  					errors.Trace(err))
   784  			}
   785  		}
   786  
   787  		// Schedule the next connected request and wait. This duration is not a
   788  		// dynamic ClientParameter as the daily unique user stats logic specifically
   789  		// requires a "connected" request no more or less often than every 24h.
   790  		var duration time.Duration
   791  		if reported {
   792  			duration = 24 * time.Hour
   793  		} else {
   794  			duration = controller.config.GetParameters().Get().Duration(
   795  				parameters.PsiphonAPIConnectedRequestRetryPeriod)
   796  		}
   797  		timer := time.NewTimer(duration)
   798  		doBreak := false
   799  		select {
   800  		case <-controller.signalReportConnected:
   801  		case <-timer.C:
   802  			// Make another connected request
   803  		case <-controller.runCtx.Done():
   804  			doBreak = true
   805  		}
   806  		timer.Stop()
   807  		if doBreak {
   808  			break loop
   809  		}
   810  	}
   811  
   812  	NoticeInfo("exiting connected reporter")
   813  }
   814  
   815  func (controller *Controller) signalConnectedReporter() {
   816  
   817  	// session is nil when DisableApi is set
   818  	if controller.config.DisableApi {
   819  		return
   820  	}
   821  
   822  	select {
   823  	case controller.signalReportConnected <- struct{}{}:
   824  	default:
   825  	}
   826  }
   827  
   828  // establishTunnelWatcher terminates the controller if a tunnel
   829  // has not been established in the configured time period. This
   830  // is regardless of how many tunnels are presently active -- meaning
   831  // that if an active tunnel was established and lost the controller
   832  // is left running (to re-establish).
   833  func (controller *Controller) establishTunnelWatcher() {
   834  	defer controller.runWaitGroup.Done()
   835  
   836  	timeout := controller.config.GetParameters().Get().Duration(
   837  		parameters.EstablishTunnelTimeout)
   838  
   839  	if timeout > 0 {
   840  		timer := time.NewTimer(timeout)
   841  		defer timer.Stop()
   842  
   843  		select {
   844  		case <-timer.C:
   845  			if !controller.hasEstablishedOnce() {
   846  				NoticeEstablishTunnelTimeout(timeout)
   847  				controller.SignalComponentFailure()
   848  			}
   849  		case <-controller.runCtx.Done():
   850  		}
   851  	}
   852  
   853  	NoticeInfo("exiting establish tunnel watcher")
   854  }
   855  
   856  // runTunnels is the controller tunnel management main loop. It starts and stops
   857  // establishing tunnels based on the target tunnel pool size and the current size
   858  // of the pool. Tunnels are established asynchronously using worker goroutines.
   859  //
   860  // When there are no server entries for the target region/protocol, the
   861  // establishCandidateGenerator will yield no candidates and wait before
   862  // trying again. In the meantime, a remote server entry fetch may supply
   863  // valid candidates.
   864  //
   865  // When a tunnel is established, it's added to the active pool. The tunnel's
   866  // operateTunnel goroutine monitors the tunnel.
   867  //
   868  // When a tunnel fails, it's removed from the pool and the establish process is
   869  // restarted to fill the pool.
   870  func (controller *Controller) runTunnels() {
   871  	defer controller.runWaitGroup.Done()
   872  
   873  	// Start running
   874  
   875  	controller.startEstablishing()
   876  loop:
   877  	for {
   878  		select {
   879  
   880  		case <-controller.signalRestartEstablishing:
   881  
   882  			// signalRestartEstablishing restarts any establishment in progress. One
   883  			// use case for this is to prioritize a newly imported, exchanged server
   884  			// entry, which will be in the affinity position.
   885  			//
   886  			// It's possible for another connection to establish concurrent to signalling;
   887  			// since the overall goal remains to establish _any_ connection, we accept that
   888  			// in some cases the exchanged server entry may not get used.
   889  
   890  			if controller.isEstablishing {
   891  				controller.stopEstablishing()
   892  				controller.startEstablishing()
   893  			}
   894  
   895  		case failedTunnel := <-controller.failedTunnels:
   896  			NoticeWarning("tunnel failed: %s", failedTunnel.dialParams.ServerEntry.GetDiagnosticID())
   897  			controller.terminateTunnel(failedTunnel)
   898  
   899  			// Clear the reference to this tunnel before calling startEstablishing,
   900  			// which will invoke a garbage collection.
   901  			failedTunnel = nil
   902  
   903  			// Concurrency note: only this goroutine may call startEstablishing/stopEstablishing,
   904  			// which reference controller.isEstablishing.
   905  			controller.startEstablishing()
   906  
   907  		case connectedTunnel := <-controller.connectedTunnels:
   908  
   909  			// Tunnel establishment has two phases: connection and activation.
   910  			//
   911  			// Connection is run concurrently by the establishTunnelWorkers, to minimize
   912  			// delay when it's not yet known which server and protocol will be available
   913  			// and unblocked.
   914  			//
   915  			// Activation is run serially, here, to minimize the overhead of making a
   916  			// handshake request and starting the operateTunnel management worker for a
   917  			// tunnel which may be discarded.
   918  			//
   919  			// When the active tunnel will complete establishment, establishment is
   920  			// stopped before activation. This interrupts all connecting tunnels and
   921  			// garbage collects their memory. The purpose is to minimize memory
   922  			// pressure when the handshake request is made. In the unlikely case that the
   923  			// handshake fails, establishment is restarted.
   924  			//
   925  			// Any delays in stopEstablishing will delay the handshake for the last
   926  			// active tunnel.
   927  			//
   928  			// In the typical case of tunnelPoolSize of 1, only a single handshake is
   929  			// performed and the homepages notices file, when used, will not be modifed
   930  			// after the NoticeTunnels(1) [i.e., connected] until NoticeTunnels(0) [i.e.,
   931  			// disconnected]. For tunnelPoolSize > 1, serial handshakes only ensures that
   932  			// each set of emitted NoticeHomepages is contiguous.
   933  
   934  			active, outstanding := controller.numTunnels()
   935  
   936  			// discardTunnel will be true here when already fully established.
   937  
   938  			discardTunnel := (outstanding <= 0)
   939  			isFirstTunnel := (active == 0)
   940  			isLastTunnel := (outstanding == 1)
   941  
   942  			if !discardTunnel {
   943  
   944  				if isLastTunnel {
   945  					controller.stopEstablishing()
   946  				}
   947  
   948  				err := connectedTunnel.Activate(controller.runCtx, controller)
   949  
   950  				if err != nil {
   951  					NoticeWarning("failed to activate %s: %v",
   952  						connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   953  						errors.Trace(err))
   954  					discardTunnel = true
   955  				} else {
   956  					// It's unlikely that registerTunnel will fail, since only this goroutine
   957  					// calls registerTunnel -- and after checking numTunnels; so failure is not
   958  					// expected.
   959  					if !controller.registerTunnel(connectedTunnel) {
   960  						NoticeWarning("failed to register %s: %v",
   961  							connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   962  							errors.Trace(err))
   963  						discardTunnel = true
   964  					}
   965  				}
   966  
   967  				// May need to replace this tunnel
   968  				if isLastTunnel && discardTunnel {
   969  					controller.startEstablishing()
   970  				}
   971  
   972  			}
   973  
   974  			if discardTunnel {
   975  				controller.discardTunnel(connectedTunnel)
   976  
   977  				// Clear the reference to this discarded tunnel and immediately run
   978  				// a garbage collection to reclaim its memory.
   979  				connectedTunnel = nil
   980  				DoGarbageCollection()
   981  
   982  				// Skip the rest of this case
   983  				break
   984  			}
   985  
   986  			atomic.AddInt32(&controller.establishedTunnelsCount, 1)
   987  
   988  			NoticeActiveTunnel(
   989  				connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   990  				connectedTunnel.dialParams.TunnelProtocol,
   991  				connectedTunnel.dialParams.ServerEntry.SupportsSSHAPIRequests())
   992  
   993  			if isFirstTunnel {
   994  
   995  				// Signal a connected request on each 1st tunnel establishment. For
   996  				// multi-tunnels, the session is connected as long as at least one
   997  				// tunnel is established.
   998  				controller.signalConnectedReporter()
   999  
  1000  				// If the handshake indicated that a new client version is available,
  1001  				// trigger an upgrade download.
  1002  				// Note: serverContext is nil when DisableApi is set
  1003  				if connectedTunnel.serverContext != nil &&
  1004  					connectedTunnel.serverContext.clientUpgradeVersion != "" {
  1005  
  1006  					handshakeVersion := connectedTunnel.serverContext.clientUpgradeVersion
  1007  					select {
  1008  					case controller.signalDownloadUpgrade <- handshakeVersion:
  1009  					default:
  1010  					}
  1011  				}
  1012  			}
  1013  
  1014  			// Set the new tunnel as the transport for the packet tunnel. The packet tunnel
  1015  			// client remains up when reestablishing, but no packets are relayed while there
  1016  			// is no connected tunnel. UseTunnel will establish a new packet tunnel SSH
  1017  			// channel over the new SSH tunnel and configure the packet tunnel client to use
  1018  			// the new SSH channel as its transport.
  1019  			//
  1020  			// Note: as is, this logic is suboptimal for tunnelPoolSize > 1, as this would
  1021  			// continuously initialize new packet tunnel sessions for each established
  1022  			// server. For now, config validation requires tunnelPoolSize == 1 when
  1023  			// the packet tunnel is used.
  1024  
  1025  			if controller.packetTunnelTransport != nil {
  1026  				controller.packetTunnelTransport.UseTunnel(connectedTunnel)
  1027  			}
  1028  
  1029  			if controller.isFullyEstablished() {
  1030  				controller.stopEstablishing()
  1031  			}
  1032  
  1033  		case <-controller.runCtx.Done():
  1034  			break loop
  1035  		}
  1036  	}
  1037  
  1038  	// Stop running
  1039  
  1040  	controller.stopEstablishing()
  1041  	controller.terminateAllTunnels()
  1042  
  1043  	// Drain tunnel channels
  1044  	close(controller.connectedTunnels)
  1045  	for tunnel := range controller.connectedTunnels {
  1046  		controller.discardTunnel(tunnel)
  1047  	}
  1048  	close(controller.failedTunnels)
  1049  	for tunnel := range controller.failedTunnels {
  1050  		controller.discardTunnel(tunnel)
  1051  	}
  1052  
  1053  	NoticeInfo("exiting run tunnels")
  1054  }
  1055  
  1056  // SignalSeededNewSLOK implements the TunnelOwner interface. This function
  1057  // is called by Tunnel.operateTunnel when the tunnel has received a new,
  1058  // previously unknown SLOK from the server. The Controller triggers an OSL
  1059  // fetch, as the new SLOK may be sufficient to access new OSLs.
  1060  func (controller *Controller) SignalSeededNewSLOK() {
  1061  	select {
  1062  	case controller.signalFetchObfuscatedServerLists <- struct{}{}:
  1063  	default:
  1064  	}
  1065  }
  1066  
  1067  // SignalTunnelFailure implements the TunnelOwner interface. This function
  1068  // is called by Tunnel.operateTunnel when the tunnel has detected that it
  1069  // has failed. The Controller will signal runTunnels to create a new
  1070  // tunnel and/or remove the tunnel from the list of active tunnels.
  1071  func (controller *Controller) SignalTunnelFailure(tunnel *Tunnel) {
  1072  	// Don't block. Assumes the receiver has a buffer large enough for
  1073  	// the typical number of operated tunnels. In case there's no room,
  1074  	// terminate the tunnel (runTunnels won't get a signal in this case,
  1075  	// but the tunnel will be removed from the list of active tunnels).
  1076  	select {
  1077  	case controller.failedTunnels <- tunnel:
  1078  	default:
  1079  		controller.terminateTunnel(tunnel)
  1080  	}
  1081  }
  1082  
  1083  // discardTunnel disposes of a successful connection that is no longer required.
  1084  func (controller *Controller) discardTunnel(tunnel *Tunnel) {
  1085  	NoticeInfo("discard tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1086  	// TODO: not calling PromoteServerEntry, since that would rank the
  1087  	// discarded tunnel before fully active tunnels. Can a discarded tunnel
  1088  	// be promoted (since it connects), but with lower rank than all active
  1089  	// tunnels?
  1090  	tunnel.Close(true)
  1091  }
  1092  
  1093  // registerTunnel adds the connected tunnel to the pool of active tunnels
  1094  // which are candidates for port forwarding. Returns true if the pool has an
  1095  // empty slot and false if the pool is full (caller should discard the tunnel).
  1096  func (controller *Controller) registerTunnel(tunnel *Tunnel) bool {
  1097  	controller.tunnelMutex.Lock()
  1098  	defer controller.tunnelMutex.Unlock()
  1099  	if len(controller.tunnels) >= controller.tunnelPoolSize {
  1100  		return false
  1101  	}
  1102  	// Perform a final check just in case we've established
  1103  	// a duplicate connection.
  1104  	for _, activeTunnel := range controller.tunnels {
  1105  		if activeTunnel.dialParams.ServerEntry.IpAddress ==
  1106  			tunnel.dialParams.ServerEntry.IpAddress {
  1107  
  1108  			NoticeWarning("duplicate tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1109  			return false
  1110  		}
  1111  	}
  1112  	controller.establishedOnce = true
  1113  	controller.tunnels = append(controller.tunnels, tunnel)
  1114  	NoticeTunnels(len(controller.tunnels))
  1115  
  1116  	// Promote this successful tunnel to first rank so it's one
  1117  	// of the first candidates next time establish runs.
  1118  	// Connecting to a TargetServerEntry does not change the
  1119  	// ranking.
  1120  	if controller.config.TargetServerEntry == "" {
  1121  		PromoteServerEntry(controller.config, tunnel.dialParams.ServerEntry.IpAddress)
  1122  	}
  1123  
  1124  	return true
  1125  }
  1126  
  1127  // hasEstablishedOnce indicates if at least one active tunnel has
  1128  // been established up to this point. This is regardeless of how many
  1129  // tunnels are presently active.
  1130  func (controller *Controller) hasEstablishedOnce() bool {
  1131  	controller.tunnelMutex.Lock()
  1132  	defer controller.tunnelMutex.Unlock()
  1133  	return controller.establishedOnce
  1134  }
  1135  
  1136  // isFullyEstablished indicates if the pool of active tunnels is full.
  1137  func (controller *Controller) isFullyEstablished() bool {
  1138  	controller.tunnelMutex.Lock()
  1139  	defer controller.tunnelMutex.Unlock()
  1140  	return len(controller.tunnels) >= controller.tunnelPoolSize
  1141  }
  1142  
  1143  // numTunnels returns the number of active and outstanding tunnels.
  1144  // Oustanding is the number of tunnels required to fill the pool of
  1145  // active tunnels.
  1146  func (controller *Controller) numTunnels() (int, int) {
  1147  	controller.tunnelMutex.Lock()
  1148  	defer controller.tunnelMutex.Unlock()
  1149  	active := len(controller.tunnels)
  1150  	outstanding := controller.tunnelPoolSize - len(controller.tunnels)
  1151  	return active, outstanding
  1152  }
  1153  
  1154  // terminateTunnel removes a tunnel from the pool of active tunnels
  1155  // and closes the tunnel. The next-tunnel state used by getNextActiveTunnel
  1156  // is adjusted as required.
  1157  func (controller *Controller) terminateTunnel(tunnel *Tunnel) {
  1158  	controller.tunnelMutex.Lock()
  1159  	defer controller.tunnelMutex.Unlock()
  1160  	for index, activeTunnel := range controller.tunnels {
  1161  		if tunnel == activeTunnel {
  1162  			controller.tunnels = append(
  1163  				controller.tunnels[:index], controller.tunnels[index+1:]...)
  1164  			if controller.nextTunnel > index {
  1165  				controller.nextTunnel--
  1166  			}
  1167  			if controller.nextTunnel >= len(controller.tunnels) {
  1168  				controller.nextTunnel = 0
  1169  			}
  1170  			activeTunnel.Close(false)
  1171  			NoticeTunnels(len(controller.tunnels))
  1172  			break
  1173  		}
  1174  	}
  1175  }
  1176  
  1177  // terminateAllTunnels empties the tunnel pool, closing all active tunnels.
  1178  // This is used when shutting down the controller.
  1179  func (controller *Controller) terminateAllTunnels() {
  1180  	controller.tunnelMutex.Lock()
  1181  	defer controller.tunnelMutex.Unlock()
  1182  	// Closing all tunnels in parallel. In an orderly shutdown, each tunnel
  1183  	// may take a few seconds to send a final status request. We only want
  1184  	// to wait as long as the single slowest tunnel.
  1185  	closeWaitGroup := new(sync.WaitGroup)
  1186  	closeWaitGroup.Add(len(controller.tunnels))
  1187  	for _, activeTunnel := range controller.tunnels {
  1188  		tunnel := activeTunnel
  1189  		go func() {
  1190  			defer closeWaitGroup.Done()
  1191  			tunnel.Close(false)
  1192  		}()
  1193  	}
  1194  	closeWaitGroup.Wait()
  1195  	controller.tunnels = make([]*Tunnel, 0)
  1196  	controller.nextTunnel = 0
  1197  	NoticeTunnels(len(controller.tunnels))
  1198  }
  1199  
  1200  // getNextActiveTunnel returns the next tunnel from the pool of active
  1201  // tunnels. Currently, tunnel selection order is simple round-robin.
  1202  func (controller *Controller) getNextActiveTunnel() (tunnel *Tunnel) {
  1203  	controller.tunnelMutex.Lock()
  1204  	defer controller.tunnelMutex.Unlock()
  1205  	if len(controller.tunnels) == 0 {
  1206  		return nil
  1207  	}
  1208  	tunnel = controller.tunnels[controller.nextTunnel]
  1209  	controller.nextTunnel =
  1210  		(controller.nextTunnel + 1) % len(controller.tunnels)
  1211  	return tunnel
  1212  }
  1213  
  1214  // isActiveTunnelServerEntry is used to check if there's already
  1215  // an existing tunnel to a candidate server.
  1216  func (controller *Controller) isActiveTunnelServerEntry(
  1217  	serverEntry *protocol.ServerEntry) bool {
  1218  
  1219  	controller.tunnelMutex.Lock()
  1220  	defer controller.tunnelMutex.Unlock()
  1221  	for _, activeTunnel := range controller.tunnels {
  1222  		if activeTunnel.dialParams.ServerEntry.IpAddress == serverEntry.IpAddress {
  1223  			return true
  1224  		}
  1225  	}
  1226  	return false
  1227  }
  1228  
  1229  func (controller *Controller) setTunnelPoolSize(tunnelPoolSize int) {
  1230  	controller.tunnelMutex.Lock()
  1231  	defer controller.tunnelMutex.Unlock()
  1232  	if tunnelPoolSize < 1 {
  1233  		tunnelPoolSize = 1
  1234  	}
  1235  	if tunnelPoolSize > MAX_TUNNEL_POOL_SIZE {
  1236  		tunnelPoolSize = MAX_TUNNEL_POOL_SIZE
  1237  	}
  1238  	controller.tunnelPoolSize = tunnelPoolSize
  1239  }
  1240  
  1241  func (controller *Controller) getTunnelPoolSize() int {
  1242  	controller.tunnelMutex.Lock()
  1243  	defer controller.tunnelMutex.Unlock()
  1244  	return controller.tunnelPoolSize
  1245  }
  1246  
  1247  // Dial selects an active tunnel and establishes a port forward
  1248  // connection through the selected tunnel. Failure to connect is considered
  1249  // a port forward failure, for the purpose of monitoring tunnel health.
  1250  //
  1251  // When split tunnel mode is enabled, the connection may be untunneled,
  1252  // depending on GeoIP classification of the destination.
  1253  //
  1254  // downstreamConn is an optional parameter which specifies a connection to be
  1255  // explicitly closed when the dialed connection is closed. For instance, this
  1256  // is used to close downstreamConn App<->LocalProxy connections when the
  1257  // related LocalProxy<->SshPortForward connections close.
  1258  func (controller *Controller) Dial(
  1259  	remoteAddr string, downstreamConn net.Conn) (conn net.Conn, err error) {
  1260  
  1261  	tunnel := controller.getNextActiveTunnel()
  1262  	if tunnel == nil {
  1263  		return nil, errors.TraceNew("no active tunnels")
  1264  	}
  1265  
  1266  	if !tunnel.config.IsSplitTunnelEnabled() {
  1267  
  1268  		tunneledConn, splitTunnel, err := tunnel.DialTCPChannel(
  1269  			remoteAddr, false, downstreamConn)
  1270  		if err != nil {
  1271  			return nil, errors.Trace(err)
  1272  		}
  1273  
  1274  		if splitTunnel {
  1275  			return nil, errors.TraceNew(
  1276  				"unexpected split tunnel classification")
  1277  		}
  1278  
  1279  		return tunneledConn, nil
  1280  	}
  1281  
  1282  	// In split tunnel mode, TCP port forwards to destinations in the same
  1283  	// country as the client are untunneled.
  1284  	//
  1285  	// Split tunnel is implemented with assistence from the server to classify
  1286  	// destinations as being in the same country as the client. The server knows
  1287  	// the client's public IP GeoIP data, and, for clients with split tunnel mode
  1288  	// enabled, the server resolves the port forward destination address and
  1289  	// checks the destination IP GeoIP data.
  1290  	//
  1291  	// When the countries match, the server "rejects" the port forward with a
  1292  	// distinct response that indicates to the client that an untunneled port
  1293  	// foward should be established locally.
  1294  	//
  1295  	// The client maintains a classification cache that allows it to make
  1296  	// untunneled port forwards without requiring a round trip to the server.
  1297  	// Only destinations classified as untunneled are stored in the cache: a
  1298  	// destination classified as tunneled requires the same round trip as an
  1299  	// unknown destination.
  1300  	//
  1301  	// When the countries do not match, the server establishes a port forward, as
  1302  	// it does for all port forwards in non-split tunnel mode. There is no
  1303  	// additional round trip for tunneled port forwards.
  1304  
  1305  	splitTunnelHost, _, err := net.SplitHostPort(remoteAddr)
  1306  	if err != nil {
  1307  		return nil, errors.Trace(err)
  1308  	}
  1309  
  1310  	untunneledCache := controller.untunneledSplitTunnelClassifications
  1311  
  1312  	// If the destination hostname is in the untunneled split tunnel
  1313  	// classifications cache, skip the round trip to the server and do the
  1314  	// direct, untunneled dial immediately.
  1315  	_, cachedUntunneled := untunneledCache.Get(splitTunnelHost)
  1316  
  1317  	if !cachedUntunneled {
  1318  
  1319  		tunneledConn, splitTunnel, err := tunnel.DialTCPChannel(
  1320  			remoteAddr, false, downstreamConn)
  1321  		if err != nil {
  1322  			return nil, errors.Trace(err)
  1323  		}
  1324  
  1325  		if !splitTunnel {
  1326  
  1327  			// Clear any cached untunneled classification entry for this destination
  1328  			// hostname, as the server is now classifying it as tunneled.
  1329  			untunneledCache.Delete(splitTunnelHost)
  1330  
  1331  			return tunneledConn, nil
  1332  		}
  1333  
  1334  		// The server has indicated that the client should make a direct,
  1335  		// untunneled dial. Cache the classification to avoid this round trip in
  1336  		// the immediate future.
  1337  		untunneledCache.Add(splitTunnelHost, true, lrucache.DefaultExpiration)
  1338  	}
  1339  
  1340  	NoticeUntunneled(splitTunnelHost)
  1341  
  1342  	untunneledConn, err := controller.DirectDial(remoteAddr)
  1343  	if err != nil {
  1344  		return nil, errors.Trace(err)
  1345  	}
  1346  
  1347  	return untunneledConn, nil
  1348  }
  1349  
  1350  // DirectDial dials an untunneled TCP connection within the controller run context.
  1351  func (controller *Controller) DirectDial(remoteAddr string) (conn net.Conn, err error) {
  1352  	return DialTCP(controller.runCtx, remoteAddr, controller.untunneledDialConfig)
  1353  }
  1354  
  1355  // triggerFetches signals RSL, OSL, and upgrade download fetchers to begin, if
  1356  // not already running. triggerFetches is called when tunnel establishment
  1357  // fails to complete within a deadline and in other cases where local
  1358  // circumvention capabilities are lacking and we may require new server
  1359  // entries or client versions with new capabilities.
  1360  func (controller *Controller) triggerFetches() {
  1361  
  1362  	// Trigger a common remote server list fetch, since we may have failed
  1363  	// to connect with all known servers. Don't block sending signal, since
  1364  	// this signal may have already been sent.
  1365  	// Don't wait for fetch remote to succeed, since it may fail and
  1366  	// enter a retry loop and we're better off trying more known servers.
  1367  	// TODO: synchronize the fetch response, so it can be incorporated
  1368  	// into the server entry iterator as soon as available.
  1369  	select {
  1370  	case controller.signalFetchCommonRemoteServerList <- struct{}{}:
  1371  	default:
  1372  	}
  1373  
  1374  	// Trigger an OSL fetch in parallel. Both fetches are run in parallel
  1375  	// so that if one out of the common RLS and OSL set is large, it doesn't
  1376  	// doesn't entirely block fetching the other.
  1377  	select {
  1378  	case controller.signalFetchObfuscatedServerLists <- struct{}{}:
  1379  	default:
  1380  	}
  1381  
  1382  	// Trigger an out-of-band upgrade availability check and download.
  1383  	// Since we may have failed to connect, we may benefit from upgrading
  1384  	// to a new client version with new circumvention capabilities.
  1385  	select {
  1386  	case controller.signalDownloadUpgrade <- "":
  1387  	default:
  1388  	}
  1389  }
  1390  
  1391  type protocolSelectionConstraints struct {
  1392  	useUpstreamProxy                          bool
  1393  	initialLimitTunnelProtocols               protocol.TunnelProtocols
  1394  	initialLimitTunnelProtocolsCandidateCount int
  1395  	limitTunnelProtocols                      protocol.TunnelProtocols
  1396  	limitTunnelDialPortNumbers                protocol.TunnelProtocolPortLists
  1397  	limitQUICVersions                         protocol.QUICVersions
  1398  	replayCandidateCount                      int
  1399  }
  1400  
  1401  func (p *protocolSelectionConstraints) hasInitialProtocols() bool {
  1402  	return len(p.initialLimitTunnelProtocols) > 0 && p.initialLimitTunnelProtocolsCandidateCount > 0
  1403  }
  1404  
  1405  func (p *protocolSelectionConstraints) isInitialCandidate(
  1406  	excludeIntensive bool,
  1407  	serverEntry *protocol.ServerEntry) bool {
  1408  
  1409  	return p.hasInitialProtocols() &&
  1410  		len(serverEntry.GetSupportedProtocols(
  1411  			conditionallyEnabledComponents{},
  1412  			p.useUpstreamProxy,
  1413  			p.initialLimitTunnelProtocols,
  1414  			p.limitTunnelDialPortNumbers,
  1415  			p.limitQUICVersions,
  1416  			excludeIntensive)) > 0
  1417  }
  1418  
  1419  func (p *protocolSelectionConstraints) isCandidate(
  1420  	excludeIntensive bool,
  1421  	serverEntry *protocol.ServerEntry) bool {
  1422  
  1423  	return len(serverEntry.GetSupportedProtocols(
  1424  		conditionallyEnabledComponents{},
  1425  		p.useUpstreamProxy,
  1426  		p.limitTunnelProtocols,
  1427  		p.limitTunnelDialPortNumbers,
  1428  		p.limitQUICVersions,
  1429  		excludeIntensive)) > 0
  1430  }
  1431  
  1432  func (p *protocolSelectionConstraints) canReplay(
  1433  	connectTunnelCount int,
  1434  	excludeIntensive bool,
  1435  	serverEntry *protocol.ServerEntry,
  1436  	replayProtocol string) bool {
  1437  
  1438  	if p.replayCandidateCount != -1 && connectTunnelCount > p.replayCandidateCount {
  1439  		return false
  1440  	}
  1441  
  1442  	return common.Contains(
  1443  		p.supportedProtocols(connectTunnelCount, excludeIntensive, serverEntry),
  1444  		replayProtocol)
  1445  }
  1446  
  1447  func (p *protocolSelectionConstraints) supportedProtocols(
  1448  	connectTunnelCount int,
  1449  	excludeIntensive bool,
  1450  	serverEntry *protocol.ServerEntry) []string {
  1451  
  1452  	limitTunnelProtocols := p.limitTunnelProtocols
  1453  
  1454  	if len(p.initialLimitTunnelProtocols) > 0 &&
  1455  		p.initialLimitTunnelProtocolsCandidateCount > connectTunnelCount {
  1456  
  1457  		limitTunnelProtocols = p.initialLimitTunnelProtocols
  1458  	}
  1459  
  1460  	return serverEntry.GetSupportedProtocols(
  1461  		conditionallyEnabledComponents{},
  1462  		p.useUpstreamProxy,
  1463  		limitTunnelProtocols,
  1464  		p.limitTunnelDialPortNumbers,
  1465  		p.limitQUICVersions,
  1466  		excludeIntensive)
  1467  }
  1468  
  1469  func (p *protocolSelectionConstraints) selectProtocol(
  1470  	connectTunnelCount int,
  1471  	excludeIntensive bool,
  1472  	serverEntry *protocol.ServerEntry) (string, bool) {
  1473  
  1474  	candidateProtocols := p.supportedProtocols(connectTunnelCount, excludeIntensive, serverEntry)
  1475  
  1476  	if len(candidateProtocols) == 0 {
  1477  		return "", false
  1478  	}
  1479  
  1480  	// Pick at random from the supported protocols. This ensures that we'll
  1481  	// eventually try all possible protocols. Depending on network
  1482  	// configuration, it may be the case that some protocol is only available
  1483  	// through multi-capability servers, and a simpler ranked preference of
  1484  	// protocols could lead to that protocol never being selected.
  1485  
  1486  	index := prng.Intn(len(candidateProtocols))
  1487  
  1488  	return candidateProtocols[index], true
  1489  
  1490  }
  1491  
  1492  type candidateServerEntry struct {
  1493  	serverEntry                *protocol.ServerEntry
  1494  	isServerAffinityCandidate  bool
  1495  	adjustedEstablishStartTime time.Time
  1496  }
  1497  
  1498  // startEstablishing creates a pool of worker goroutines which will
  1499  // attempt to establish tunnels to candidate servers. The candidates
  1500  // are generated by another goroutine.
  1501  func (controller *Controller) startEstablishing() {
  1502  	if controller.isEstablishing {
  1503  		return
  1504  	}
  1505  	NoticeInfo("start establishing")
  1506  
  1507  	// establishStartTime is used to calculate and report the client's tunnel
  1508  	// establishment duration. Establishment duration should include all
  1509  	// initialization in launchEstablishing and establishCandidateGenerator,
  1510  	// including any potentially long-running datastore iterations.
  1511  	establishStartTime := time.Now()
  1512  
  1513  	controller.concurrentEstablishTunnelsMutex.Lock()
  1514  	controller.establishConnectTunnelCount = 0
  1515  	controller.concurrentEstablishTunnels = 0
  1516  	controller.concurrentIntensiveEstablishTunnels = 0
  1517  	controller.peakConcurrentEstablishTunnels = 0
  1518  	controller.peakConcurrentIntensiveEstablishTunnels = 0
  1519  	controller.concurrentEstablishTunnelsMutex.Unlock()
  1520  
  1521  	DoGarbageCollection()
  1522  	emitMemoryMetrics()
  1523  
  1524  	// The establish context cancelFunc, controller.stopEstablish, is called in
  1525  	// controller.stopEstablishing.
  1526  
  1527  	controller.isEstablishing = true
  1528  	controller.establishStartTime = establishStartTime
  1529  	controller.establishCtx, controller.stopEstablish = context.WithCancel(controller.runCtx)
  1530  	controller.establishWaitGroup = new(sync.WaitGroup)
  1531  	controller.candidateServerEntries = make(chan *candidateServerEntry)
  1532  
  1533  	// The server affinity mechanism attempts to favor the previously
  1534  	// used server when reconnecting. This is beneficial for user
  1535  	// applications which expect consistency in user IP address (for
  1536  	// example, a web site which prompts for additional user
  1537  	// authentication when the IP address changes).
  1538  	//
  1539  	// Only the very first server, as determined by
  1540  	// datastore.PromoteServerEntry(), is the server affinity candidate.
  1541  	// Concurrent connections attempts to many servers are launched
  1542  	// without delay, in case the affinity server connection fails.
  1543  	// While the affinity server connection is outstanding, when any
  1544  	// other connection is established, there is a short grace period
  1545  	// delay before delivering the established tunnel; this allows some
  1546  	// time for the affinity server connection to succeed first.
  1547  	// When the affinity server connection fails, any other established
  1548  	// tunnel is registered without delay.
  1549  	//
  1550  	// Note: the establishTunnelWorker that receives the affinity
  1551  	// candidate is solely resonsible for closing
  1552  	// controller.serverAffinityDoneBroadcast.
  1553  	controller.serverAffinityDoneBroadcast = make(chan struct{})
  1554  
  1555  	controller.establishWaitGroup.Add(1)
  1556  	go controller.launchEstablishing()
  1557  }
  1558  
  1559  func (controller *Controller) launchEstablishing() {
  1560  
  1561  	defer controller.establishWaitGroup.Done()
  1562  
  1563  	// Before starting the establish tunnel workers, get and apply
  1564  	// tactics, launching a tactics request if required.
  1565  	//
  1566  	// Wait only TacticsWaitPeriod for the tactics request to complete (or
  1567  	// fail) before proceeding with tunnel establishment, in case the tactics
  1568  	// request is blocked or takes very long to complete.
  1569  	//
  1570  	// An in-flight tactics request uses meek in round tripper mode, which
  1571  	// uses less resources than meek tunnel relay mode. For this reason, the
  1572  	// tactics request is not counted in concurrentIntensiveEstablishTunnels.
  1573  	//
  1574  	// TODO: HTTP/2 uses significantly more memory, so perhaps
  1575  	// concurrentIntensiveEstablishTunnels should be counted in that case.
  1576  	//
  1577  	// Any in-flight tactics request or pending retry will be
  1578  	// canceled when establishment is stopped.
  1579  
  1580  	if !controller.config.DisableTactics {
  1581  
  1582  		timeout := controller.config.GetParameters().Get().Duration(
  1583  			parameters.TacticsWaitPeriod)
  1584  
  1585  		tacticsDone := make(chan struct{})
  1586  		tacticsWaitPeriod := time.NewTimer(timeout)
  1587  		defer tacticsWaitPeriod.Stop()
  1588  
  1589  		controller.establishWaitGroup.Add(1)
  1590  		go func() {
  1591  			defer controller.establishWaitGroup.Done()
  1592  			defer close(tacticsDone)
  1593  			GetTactics(controller.establishCtx, controller.config)
  1594  		}()
  1595  
  1596  		select {
  1597  		case <-tacticsDone:
  1598  		case <-tacticsWaitPeriod.C:
  1599  		}
  1600  
  1601  		tacticsWaitPeriod.Stop()
  1602  
  1603  		if controller.isStopEstablishing() {
  1604  			// This check isn't strictly required but avoids the overhead of launching
  1605  			// workers if establishment stopped while awaiting a tactics request.
  1606  			return
  1607  		}
  1608  	}
  1609  
  1610  	// Initial- and LimitTunnelProtocols may be set by tactics.
  1611  	//
  1612  	// These protocol limits are fixed once per establishment, for
  1613  	// consistent application of related probabilities (applied by
  1614  	// ParametersAccessor.TunnelProtocols). The
  1615  	// establishLimitTunnelProtocolsState field must be read-only after this
  1616  	// point, allowing concurrent reads by establishment workers.
  1617  
  1618  	p := controller.config.GetParameters().Get()
  1619  
  1620  	controller.protocolSelectionConstraints = &protocolSelectionConstraints{
  1621  		useUpstreamProxy:                          controller.config.UseUpstreamProxy(),
  1622  		initialLimitTunnelProtocols:               p.TunnelProtocols(parameters.InitialLimitTunnelProtocols),
  1623  		initialLimitTunnelProtocolsCandidateCount: p.Int(parameters.InitialLimitTunnelProtocolsCandidateCount),
  1624  		limitTunnelProtocols:                      p.TunnelProtocols(parameters.LimitTunnelProtocols),
  1625  
  1626  		limitTunnelDialPortNumbers: protocol.TunnelProtocolPortLists(
  1627  			p.TunnelProtocolPortLists(parameters.LimitTunnelDialPortNumbers)),
  1628  
  1629  		replayCandidateCount: p.Int(parameters.ReplayCandidateCount),
  1630  	}
  1631  
  1632  	// ConnectionWorkerPoolSize may be set by tactics.
  1633  
  1634  	workerPoolSize := p.Int(parameters.ConnectionWorkerPoolSize)
  1635  
  1636  	// When TargetServerEntry is used, override any worker pool size config or
  1637  	// tactic parameter and use a pool size of 1. The typical use case for
  1638  	// TargetServerEntry is to test a specific server with a single connection
  1639  	// attempt. Furthermore, too many concurrent attempts to connect to the
  1640  	// same server will trigger rate limiting.
  1641  	if controller.config.TargetServerEntry != "" {
  1642  		workerPoolSize = 1
  1643  	}
  1644  
  1645  	// TunnelPoolSize may be set by tactics, subject to local constraints. A pool
  1646  	// size of one is forced in packet tunnel mode or when using a
  1647  	// TargetServerEntry. The tunnel pool size is reduced when there are
  1648  	// insufficent known server entries, within the set region and protocol
  1649  	// constraints, to satisfy the target.
  1650  	//
  1651  	// Limitations, to simplify concurrent access to shared state: a ceiling of
  1652  	// MAX_TUNNEL_POOL_SIZE is enforced by setTunnelPoolSize; the tunnel pool
  1653  	// size target is not re-adjusted after an API handshake, even though the
  1654  	// handshake response may deliver new tactics, or prune server entries which
  1655  	// were potential candidates; nor is the target re-adjusted after fetching
  1656  	// new server entries during this establishment.
  1657  
  1658  	tunnelPoolSize := p.Int(parameters.TunnelPoolSize)
  1659  	if controller.config.PacketTunnelTunFileDescriptor > 0 ||
  1660  		controller.config.TargetServerEntry != "" {
  1661  		tunnelPoolSize = 1
  1662  	}
  1663  
  1664  	p.Close()
  1665  
  1666  	// Trigger CandidateServers and AvailableEgressRegions notices. By default,
  1667  	// this is an asynchronous operation, as the underlying full server entry
  1668  	// list enumeration may be a slow operation. In certain cases, where
  1669  	// candidate counts are required up front, await the result before
  1670  	// proceeding.
  1671  
  1672  	awaitResponse := tunnelPoolSize > 1 ||
  1673  		controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount > 0
  1674  
  1675  	// AvailableEgressRegions: after a fresh install, the outer client may not
  1676  	// have a list of regions to display; and LimitTunnelProtocols may reduce the
  1677  	// number of available regions.
  1678  	//
  1679  	// When the outer client receives NoticeAvailableEgressRegions and the
  1680  	// configured EgressRegion is not included in the region list, the outer
  1681  	// client _should_ stop tunnel-core and prompt the user to change the region
  1682  	// selection, as there are insufficient servers/capabilities to establish a
  1683  	// tunnel in the selected region.
  1684  	//
  1685  	// This report is delayed until after tactics are likely to be applied,
  1686  	// above; this avoids a ReportAvailableRegions reporting too many regions,
  1687  	// followed shortly by a ReportAvailableRegions reporting fewer regions. That
  1688  	// sequence could cause issues in the outer client UI.
  1689  	//
  1690  	// The reported regions are limited by protocolSelectionConstraints; in the
  1691  	// case where an initial limit is in place, only regions available for the
  1692  	// initial limit are reported. The initial phase will not complete if
  1693  	// EgressRegion is set such that there are no server entries with the
  1694  	// necessary protocol capabilities (either locally or from a remote server
  1695  	// list fetch).
  1696  
  1697  	// Concurrency note: controller.protocolSelectionConstraints may be
  1698  	// overwritten before serverEntriesReporter reads it, and so cannot be
  1699  	// accessed directly by serverEntriesReporter.
  1700  	reportRequest := &serverEntriesReportRequest{
  1701  		constraints: controller.protocolSelectionConstraints,
  1702  	}
  1703  
  1704  	if awaitResponse {
  1705  		// Buffer size of 1 ensures the sender, serverEntryReporter, won't block on
  1706  		// sending the response in the case where launchEstablishing exits due to
  1707  		// stopping establishment.
  1708  		reportRequest.awaitResponse = make(chan *serverEntriesReportResponse, 1)
  1709  	}
  1710  
  1711  	controller.signalServerEntriesReporter(reportRequest)
  1712  
  1713  	if awaitResponse {
  1714  
  1715  		var reportResponse *serverEntriesReportResponse
  1716  		select {
  1717  		case reportResponse = <-reportRequest.awaitResponse:
  1718  		case <-controller.establishCtx.Done():
  1719  			// The sender may be gone when shutting down, or may not send until after
  1720  			// stopping establishment.
  1721  			return
  1722  		}
  1723  		if reportResponse.err != nil {
  1724  			NoticeError("failed to report server entries: %v",
  1725  				errors.Trace(reportResponse.err))
  1726  			controller.SignalComponentFailure()
  1727  			return
  1728  		}
  1729  
  1730  		// Make adjustments based on candidate counts.
  1731  
  1732  		if tunnelPoolSize > 1 {
  1733  			// Initial canidate count is ignored as count candidates will eventually
  1734  			// become available.
  1735  			if reportResponse.candidates < tunnelPoolSize {
  1736  				tunnelPoolSize = reportResponse.candidates
  1737  			}
  1738  			if tunnelPoolSize < 1 {
  1739  				tunnelPoolSize = 1
  1740  			}
  1741  		}
  1742  		controller.setTunnelPoolSize(tunnelPoolSize)
  1743  
  1744  		// If InitialLimitTunnelProtocols is configured but cannot be satisfied,
  1745  		// skip the initial phase in this establishment. This avoids spinning,
  1746  		// unable to connect, in this case. InitialLimitTunnelProtocols is
  1747  		// intended to prioritize certain protocols, but not strictly select them.
  1748  		//
  1749  		// The candidate count check ignores egress region selection. When an egress
  1750  		// region is selected, it's the responsibility of the outer client to react
  1751  		// to the following ReportAvailableRegions output and clear the user's
  1752  		// selected region to prevent spinning, unable to connect. The initial phase
  1753  		// is skipped only when InitialLimitTunnelProtocols cannot be satisfied
  1754  		// _regardless_ of region selection.
  1755  		//
  1756  		// We presume that, in practise, most clients will have embedded server
  1757  		// entries with capabilities for most protocols; and that clients will
  1758  		// often perform RSL checks. So clients should most often have the
  1759  		// necessary capabilities to satisfy InitialLimitTunnelProtocols. When
  1760  		// this check fails, RSL/OSL/upgrade checks are triggered in order to gain
  1761  		// new capabilities.
  1762  		//
  1763  		// LimitTunnelProtocols remains a hard limit, as using prohibited
  1764  		// protocols may have some bad effect, such as a firewall blocking all
  1765  		// traffic from a host.
  1766  
  1767  		if controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount > 0 {
  1768  
  1769  			if reportResponse.initialCandidatesAnyEgressRegion == 0 {
  1770  				NoticeWarning("skipping initial limit tunnel protocols")
  1771  				controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount = 0
  1772  
  1773  				// Since we were unable to satisfy the InitialLimitTunnelProtocols
  1774  				// tactic, trigger RSL, OSL, and upgrade fetches to potentially
  1775  				// gain new capabilities.
  1776  				controller.triggerFetches()
  1777  			}
  1778  		}
  1779  	}
  1780  
  1781  	for i := 0; i < workerPoolSize; i++ {
  1782  		controller.establishWaitGroup.Add(1)
  1783  		go controller.establishTunnelWorker()
  1784  	}
  1785  
  1786  	controller.establishWaitGroup.Add(1)
  1787  	go controller.establishCandidateGenerator()
  1788  }
  1789  
  1790  // stopEstablishing signals the establish goroutines to stop and waits
  1791  // for the group to halt.
  1792  func (controller *Controller) stopEstablishing() {
  1793  	if !controller.isEstablishing {
  1794  		return
  1795  	}
  1796  	NoticeInfo("stop establishing")
  1797  	controller.stopEstablish()
  1798  	// Note: establishCandidateGenerator closes controller.candidateServerEntries
  1799  	// (as it may be sending to that channel).
  1800  	controller.establishWaitGroup.Wait()
  1801  	NoticeInfo("stopped establishing")
  1802  
  1803  	controller.isEstablishing = false
  1804  	controller.establishStartTime = time.Time{}
  1805  	controller.establishCtx = nil
  1806  	controller.stopEstablish = nil
  1807  	controller.establishWaitGroup = nil
  1808  	controller.candidateServerEntries = nil
  1809  	controller.serverAffinityDoneBroadcast = nil
  1810  
  1811  	controller.concurrentEstablishTunnelsMutex.Lock()
  1812  	peakConcurrent := controller.peakConcurrentEstablishTunnels
  1813  	peakConcurrentIntensive := controller.peakConcurrentIntensiveEstablishTunnels
  1814  	controller.establishConnectTunnelCount = 0
  1815  	controller.concurrentEstablishTunnels = 0
  1816  	controller.concurrentIntensiveEstablishTunnels = 0
  1817  	controller.peakConcurrentEstablishTunnels = 0
  1818  	controller.peakConcurrentIntensiveEstablishTunnels = 0
  1819  	controller.concurrentEstablishTunnelsMutex.Unlock()
  1820  	NoticeInfo("peak concurrent establish tunnels: %d", peakConcurrent)
  1821  	NoticeInfo("peak concurrent resource intensive establish tunnels: %d", peakConcurrentIntensive)
  1822  
  1823  	emitMemoryMetrics()
  1824  	DoGarbageCollection()
  1825  
  1826  	// Record datastore metrics after establishment, the phase which generates
  1827  	// the bulk of all datastore transactions: iterating over server entries,
  1828  	// storing new server entries, etc.
  1829  	emitDatastoreMetrics()
  1830  
  1831  	// Similarly, establishment generates the bulk of domain resolves.
  1832  	emitDNSMetrics(controller.resolver)
  1833  }
  1834  
  1835  // establishCandidateGenerator populates the candidate queue with server entries
  1836  // from the data store. Server entries are iterated in rank order, so that promoted
  1837  // servers with higher rank are priority candidates.
  1838  func (controller *Controller) establishCandidateGenerator() {
  1839  	defer controller.establishWaitGroup.Done()
  1840  	defer close(controller.candidateServerEntries)
  1841  
  1842  	// networkWaitDuration is the elapsed time spent waiting
  1843  	// for network connectivity. This duration will be excluded
  1844  	// from reported tunnel establishment duration.
  1845  	var totalNetworkWaitDuration time.Duration
  1846  
  1847  	applyServerAffinity, iterator, err := NewServerEntryIterator(controller.config)
  1848  	if err != nil {
  1849  		NoticeError("failed to iterate over candidates: %v", errors.Trace(err))
  1850  		controller.SignalComponentFailure()
  1851  		return
  1852  	}
  1853  	defer iterator.Close()
  1854  
  1855  	// TODO: reconcile server affinity scheme with multi-tunnel mode
  1856  	if controller.getTunnelPoolSize() > 1 {
  1857  		applyServerAffinity = false
  1858  	}
  1859  
  1860  	isServerAffinityCandidate := true
  1861  	if !applyServerAffinity {
  1862  		isServerAffinityCandidate = false
  1863  		close(controller.serverAffinityDoneBroadcast)
  1864  	}
  1865  
  1866  loop:
  1867  	// Repeat until stopped
  1868  	for {
  1869  
  1870  		// A "round" consists of a new shuffle of the server entries and attempted
  1871  		// connections up to the end of the server entry iterator, or
  1872  		// parameters.EstablishTunnelWorkTime elapsed. Time spent waiting for
  1873  		// network connectivity is excluded from round elapsed time.
  1874  		//
  1875  		// After a round, if parameters.EstablishTunnelWorkTime has elapsed in total
  1876  		// with no tunnel established, remote server list and upgrade checks are
  1877  		// triggered.
  1878  		//
  1879  		// A complete server entry iteration does not trigger fetches since it's
  1880  		// possible to have fewer than parameters.ConnectionWorkerPoolSize
  1881  		// candidates, in which case rounds end instantly due to the complete server
  1882  		// entry iteration. An exception is made for an empty server entry iterator;
  1883  		// in that case fetches may be triggered immediately.
  1884  		//
  1885  		// The number of server candidates may change during this loop, due to
  1886  		// remote server list fetches. Due to the performance impact, we will not
  1887  		// trigger additional, informational CandidateServer notices while in the
  1888  		// establishing loop. Clients typically re-establish often enough that we
  1889  		// will see the effect of the remote server list fetch in diagnostics.
  1890  
  1891  		roundStartTime := time.Now()
  1892  		var roundNetworkWaitDuration time.Duration
  1893  
  1894  		workTime := controller.config.GetParameters().Get().Duration(
  1895  			parameters.EstablishTunnelWorkTime)
  1896  
  1897  		candidateServerEntryCount := 0
  1898  
  1899  		// Send each iterator server entry to the establish workers
  1900  		for {
  1901  
  1902  			networkWaitStartTime := time.Now()
  1903  			if !WaitForNetworkConnectivity(
  1904  				controller.establishCtx,
  1905  				controller.config.NetworkConnectivityChecker) {
  1906  				break loop
  1907  			}
  1908  			networkWaitDuration := time.Since(networkWaitStartTime)
  1909  			roundNetworkWaitDuration += networkWaitDuration
  1910  			totalNetworkWaitDuration += networkWaitDuration
  1911  
  1912  			serverEntry, err := iterator.Next()
  1913  			if err != nil {
  1914  				NoticeError("failed to get next candidate: %v", errors.Trace(err))
  1915  				controller.SignalComponentFailure()
  1916  				break loop
  1917  			}
  1918  			if serverEntry == nil {
  1919  				// Completed this iteration
  1920  				NoticeInfo("completed server entry iteration")
  1921  				break
  1922  			}
  1923  
  1924  			if controller.config.TargetApiProtocol == protocol.PSIPHON_SSH_API_PROTOCOL &&
  1925  				!serverEntry.SupportsSSHAPIRequests() {
  1926  				continue
  1927  			}
  1928  
  1929  			candidateServerEntryCount += 1
  1930  
  1931  			// adjustedEstablishStartTime is establishStartTime shifted
  1932  			// to exclude time spent waiting for network connectivity.
  1933  			adjustedEstablishStartTime := controller.establishStartTime.Add(
  1934  				totalNetworkWaitDuration)
  1935  
  1936  			candidate := &candidateServerEntry{
  1937  				serverEntry:                serverEntry,
  1938  				isServerAffinityCandidate:  isServerAffinityCandidate,
  1939  				adjustedEstablishStartTime: adjustedEstablishStartTime,
  1940  			}
  1941  
  1942  			wasServerAffinityCandidate := isServerAffinityCandidate
  1943  
  1944  			// Note: there must be only one server affinity candidate, as it
  1945  			// closes the serverAffinityDoneBroadcast channel.
  1946  			isServerAffinityCandidate = false
  1947  
  1948  			// TODO: here we could generate multiple candidates from the
  1949  			// server entry when there are many MeekFrontingAddresses.
  1950  
  1951  			select {
  1952  			case controller.candidateServerEntries <- candidate:
  1953  			case <-controller.establishCtx.Done():
  1954  				break loop
  1955  			}
  1956  
  1957  			if time.Since(roundStartTime)-roundNetworkWaitDuration > workTime {
  1958  				// Start over, after a brief pause, with a new shuffle of the server
  1959  				// entries, and potentially some newly fetched server entries.
  1960  				break
  1961  			}
  1962  
  1963  			if wasServerAffinityCandidate {
  1964  
  1965  				// Don't start the next candidate until either the server affinity
  1966  				// candidate has completed (success or failure) or is still working
  1967  				// and the grace period has elapsed.
  1968  
  1969  				gracePeriod := controller.config.GetParameters().Get().Duration(
  1970  					parameters.EstablishTunnelServerAffinityGracePeriod)
  1971  
  1972  				if gracePeriod > 0 {
  1973  					timer := time.NewTimer(gracePeriod)
  1974  					select {
  1975  					case <-timer.C:
  1976  					case <-controller.serverAffinityDoneBroadcast:
  1977  					case <-controller.establishCtx.Done():
  1978  						timer.Stop()
  1979  						break loop
  1980  					}
  1981  					timer.Stop()
  1982  				}
  1983  			}
  1984  		}
  1985  
  1986  		// Free up resources now, but don't reset until after the pause.
  1987  		iterator.Close()
  1988  
  1989  		// Trigger RSL, OSL, and upgrade checks after failing to establish a
  1990  		// tunnel within parameters.EstablishTunnelWorkTime, or if there are
  1991  		// no server entries present.
  1992  		//
  1993  		// While the trigger is made after each round,
  1994  		// parameter.FetchRemoteServerListStalePeriod will limit the actual
  1995  		// frequency of fetches. Continuing to trigger allows for very long running
  1996  		// establishments to perhaps eventually succeed.
  1997  		//
  1998  		// No fetches are triggered when TargetServerEntry is specified. In that
  1999  		// case, we're only trying to connect to a specific server entry.
  2000  
  2001  		if (candidateServerEntryCount == 0 ||
  2002  			time.Since(controller.establishStartTime)-totalNetworkWaitDuration > workTime) &&
  2003  			controller.config.TargetServerEntry == "" {
  2004  
  2005  			controller.triggerFetches()
  2006  		}
  2007  
  2008  		// After a complete iteration of candidate servers, pause before iterating again.
  2009  		// This helps avoid some busy wait loop conditions, and also allows some time for
  2010  		// network conditions to change. Also allows for fetch remote to complete,
  2011  		// in typical conditions (it isn't strictly necessary to wait for this, there will
  2012  		// be more rounds if required).
  2013  
  2014  		p := controller.config.GetParameters().Get()
  2015  		timeout := prng.JitterDuration(
  2016  			p.Duration(parameters.EstablishTunnelPausePeriod),
  2017  			p.Float(parameters.EstablishTunnelPausePeriodJitter))
  2018  		p.Close()
  2019  
  2020  		timer := time.NewTimer(timeout)
  2021  		select {
  2022  		case <-timer.C:
  2023  			// Retry iterating
  2024  		case <-controller.establishCtx.Done():
  2025  			timer.Stop()
  2026  			break loop
  2027  		}
  2028  		timer.Stop()
  2029  
  2030  		iterator.Reset()
  2031  	}
  2032  }
  2033  
  2034  // establishTunnelWorker pulls candidates from the candidate queue, establishes
  2035  // a connection to the tunnel server, and delivers the connected tunnel to a channel.
  2036  func (controller *Controller) establishTunnelWorker() {
  2037  	defer controller.establishWaitGroup.Done()
  2038  loop:
  2039  	for candidateServerEntry := range controller.candidateServerEntries {
  2040  
  2041  		// Note: don't receive from candidateServerEntries and isStopEstablishing
  2042  		// in the same select, since we want to prioritize receiving the stop signal
  2043  		if controller.isStopEstablishing() {
  2044  			break loop
  2045  		}
  2046  
  2047  		// There may already be a tunnel to this candidate. If so, skip it.
  2048  		if controller.isActiveTunnelServerEntry(candidateServerEntry.serverEntry) {
  2049  			continue
  2050  		}
  2051  
  2052  		// TODO: we allow multiple, concurrent workers to attempt to connect to the
  2053  		// same server. This is not wasteful if the server supports several
  2054  		// different protocols, some of which may be blocked while others are not
  2055  		// blocked. Limiting protocols with [Initial]LimitTunnelProtocols may make
  2056  		// these multiple attempts redundent. Also, replay should be used only by
  2057  		// the first attempt.
  2058  
  2059  		// upstreamProxyErrorCallback will post NoticeUpstreamProxyError when the
  2060  		// tunnel dial fails due to an upstream proxy error. As the upstream proxy
  2061  		// is user configured, the error message may need to be relayed to the user.
  2062  
  2063  		// As the callback may be invoked after establishment is over (e.g., if an
  2064  		// initial dial isn't fully shutdown when ConnectTunnel returns; or a meek
  2065  		// underlying TCP connection re-dial) don't access these variables
  2066  		// directly.
  2067  		callbackCandidateServerEntry := candidateServerEntry
  2068  		callbackEstablishCtx := controller.establishCtx
  2069  
  2070  		upstreamProxyErrorCallback := func(err error) {
  2071  
  2072  			// Do not post the notice when overall establishment context is canceled or
  2073  			// timed-out: the upstream proxy connection error is likely a result of the
  2074  			// cancellation, and not a condition to be fixed by the user. In the case
  2075  			// of meek underlying TCP connection re-dials, this condition will always
  2076  			// be true; however in this case the initial dial succeeded with the
  2077  			// current upstream proxy settings, so any upstream proxy error is
  2078  			// transient.
  2079  			if callbackEstablishCtx.Err() != nil {
  2080  				return
  2081  			}
  2082  
  2083  			// Another class of non-fatal upstream proxy error arises from proxies
  2084  			// which limit permitted proxied ports. In this case, some tunnels may fail
  2085  			// due to dial port, while others may eventually succeed. To avoid this
  2086  			// class of errors, delay posting the notice. If the upstream proxy works,
  2087  			// _some_ tunnel should connect. If the upstream proxy configuration is
  2088  			// broken, the error should persist and eventually get posted.
  2089  
  2090  			p := controller.config.GetParameters().Get()
  2091  			workerPoolSize := p.Int(parameters.ConnectionWorkerPoolSize)
  2092  			minWaitDuration := p.Duration(parameters.UpstreamProxyErrorMinWaitDuration)
  2093  			maxWaitDuration := p.Duration(parameters.UpstreamProxyErrorMaxWaitDuration)
  2094  			p.Close()
  2095  
  2096  			controller.concurrentEstablishTunnelsMutex.Lock()
  2097  			establishConnectTunnelCount := controller.establishConnectTunnelCount
  2098  			controller.concurrentEstablishTunnelsMutex.Unlock()
  2099  
  2100  			// Delay UpstreamProxyErrorMinWaitDuration (excluding time spent waiting
  2101  			// for network connectivity) and then until either
  2102  			// UpstreamProxyErrorMaxWaitDuration has elapsed or, to post sooner if many
  2103  			// candidates are failing, at least workerPoolSize tunnel connection
  2104  			// attempts have completed. We infer that at least workerPoolSize
  2105  			// candidates have completed by checking that at least 2*workerPoolSize
  2106  			// candidates have started.
  2107  
  2108  			elapsedTime := time.Since(
  2109  				callbackCandidateServerEntry.adjustedEstablishStartTime)
  2110  
  2111  			if elapsedTime < minWaitDuration ||
  2112  				(elapsedTime < maxWaitDuration &&
  2113  					establishConnectTunnelCount < 2*workerPoolSize) {
  2114  				return
  2115  			}
  2116  
  2117  			NoticeUpstreamProxyError(err)
  2118  		}
  2119  
  2120  		// Select the tunnel protocol. The selection will be made at random
  2121  		// from protocols supported by the server entry, optionally limited by
  2122  		// LimitTunnelProtocols.
  2123  		//
  2124  		// When limiting concurrent resource intensive protocol connection
  2125  		// workers, and at the limit, do not select resource intensive
  2126  		// protocols since otherwise the candidate must be skipped.
  2127  		//
  2128  		// If at the limit and unabled to select a non-intensive protocol,
  2129  		// skip the candidate entirely and move on to the next. Since
  2130  		// candidates are shuffled it's likely that the next candidate is not
  2131  		// intensive. In this case, a StaggerConnectionWorkersMilliseconds
  2132  		// delay may still be incurred.
  2133  
  2134  		limitIntensiveConnectionWorkers := controller.config.GetParameters().Get().Int(
  2135  			parameters.LimitIntensiveConnectionWorkers)
  2136  
  2137  		controller.concurrentEstablishTunnelsMutex.Lock()
  2138  
  2139  		excludeIntensive := false
  2140  		if limitIntensiveConnectionWorkers > 0 &&
  2141  			controller.concurrentIntensiveEstablishTunnels >= limitIntensiveConnectionWorkers {
  2142  			excludeIntensive = true
  2143  		}
  2144  
  2145  		canReplay := func(serverEntry *protocol.ServerEntry, replayProtocol string) bool {
  2146  			return controller.protocolSelectionConstraints.canReplay(
  2147  				controller.establishConnectTunnelCount,
  2148  				excludeIntensive,
  2149  				serverEntry,
  2150  				replayProtocol)
  2151  		}
  2152  
  2153  		selectProtocol := func(serverEntry *protocol.ServerEntry) (string, bool) {
  2154  			return controller.protocolSelectionConstraints.selectProtocol(
  2155  				controller.establishConnectTunnelCount,
  2156  				excludeIntensive,
  2157  				serverEntry)
  2158  		}
  2159  
  2160  		// MakeDialParameters may return a replay instance, if the server
  2161  		// entry has a previous, recent successful connection and
  2162  		// tactics/config has not changed.
  2163  		//
  2164  		// In the first round -- and later rounds, with some probability -- of
  2165  		// establishing, ServerEntryIterator will move potential replay candidates
  2166  		// to the front of the iterator after the random shuffle, which greatly
  2167  		// prioritizes previously successful servers for that round.
  2168  		//
  2169  		// As ServerEntryIterator does not unmarshal and validate replay
  2170  		// candidate dial parameters, some potential replay candidates may
  2171  		// have expired or otherwise ineligible dial parameters; in this case
  2172  		// the candidate proceeds without replay.
  2173  		//
  2174  		// The ReplayCandidateCount tactic determines how many candidates may use
  2175  		// replay. After ReplayCandidateCount candidates of any type, replay or no,
  2176  		// replay is skipped. If ReplayCandidateCount exceeds the intial round,
  2177  		// replay may still be performed but the iterator may no longer move
  2178  		// potential replay server entries to the front. When ReplayCandidateCount
  2179  		// is set to -1, unlimited candidates may use replay.
  2180  
  2181  		dialParams, err := MakeDialParameters(
  2182  			controller.config,
  2183  			upstreamProxyErrorCallback,
  2184  			canReplay,
  2185  			selectProtocol,
  2186  			candidateServerEntry.serverEntry,
  2187  			false,
  2188  			controller.establishConnectTunnelCount,
  2189  			int(atomic.LoadInt32(&controller.establishedTunnelsCount)))
  2190  		if dialParams == nil || err != nil {
  2191  
  2192  			controller.concurrentEstablishTunnelsMutex.Unlock()
  2193  
  2194  			// MakeDialParameters returns nil/nil when the server entry is to
  2195  			// be skipped. See MakeDialParameters for skip cases and skip
  2196  			// logging. Silently fail the candidate in this case. Otherwise,
  2197  			// emit error.
  2198  			if err != nil {
  2199  				NoticeInfo("failed to make dial parameters for %s: %v",
  2200  					candidateServerEntry.serverEntry.GetDiagnosticID(),
  2201  					errors.Trace(err))
  2202  			}
  2203  
  2204  			// Unblock other candidates immediately when server affinity
  2205  			// candidate is skipped.
  2206  			if candidateServerEntry.isServerAffinityCandidate {
  2207  				close(controller.serverAffinityDoneBroadcast)
  2208  			}
  2209  
  2210  			continue
  2211  		}
  2212  
  2213  		// Increment establishConnectTunnelCount only after selectProtocol has
  2214  		// succeeded to ensure InitialLimitTunnelProtocolsCandidateCount
  2215  		// candidates use InitialLimitTunnelProtocols.
  2216  		establishConnectTunnelCount := controller.establishConnectTunnelCount
  2217  		controller.establishConnectTunnelCount += 1
  2218  
  2219  		isIntensive := protocol.TunnelProtocolIsResourceIntensive(dialParams.TunnelProtocol)
  2220  
  2221  		if isIntensive {
  2222  			controller.concurrentIntensiveEstablishTunnels += 1
  2223  			if controller.concurrentIntensiveEstablishTunnels > controller.peakConcurrentIntensiveEstablishTunnels {
  2224  				controller.peakConcurrentIntensiveEstablishTunnels = controller.concurrentIntensiveEstablishTunnels
  2225  			}
  2226  		}
  2227  		controller.concurrentEstablishTunnels += 1
  2228  		if controller.concurrentEstablishTunnels > controller.peakConcurrentEstablishTunnels {
  2229  			controller.peakConcurrentEstablishTunnels = controller.concurrentEstablishTunnels
  2230  		}
  2231  
  2232  		controller.concurrentEstablishTunnelsMutex.Unlock()
  2233  
  2234  		// Apply stagger only now that we're past MakeDialParameters and
  2235  		// protocol selection logic which may have caused the candidate to be
  2236  		// skipped. The stagger logic delays dialing, and we don't want to
  2237  		// incur that delay that when skipping.
  2238  		//
  2239  		// Locking staggerMutex serializes staggers, so that multiple workers
  2240  		// don't simply sleep in parallel.
  2241  		//
  2242  		// The stagger is applied when establishConnectTunnelCount > 0 -- that
  2243  		// is, for all but the first dial.
  2244  
  2245  		p := controller.config.GetParameters().Get()
  2246  		staggerPeriod := p.Duration(parameters.StaggerConnectionWorkersPeriod)
  2247  		staggerJitter := p.Float(parameters.StaggerConnectionWorkersJitter)
  2248  		p.Close()
  2249  
  2250  		if establishConnectTunnelCount > 0 && staggerPeriod != 0 {
  2251  			controller.staggerMutex.Lock()
  2252  			timer := time.NewTimer(prng.JitterDuration(staggerPeriod, staggerJitter))
  2253  			select {
  2254  			case <-timer.C:
  2255  			case <-controller.establishCtx.Done():
  2256  			}
  2257  			timer.Stop()
  2258  			controller.staggerMutex.Unlock()
  2259  		}
  2260  
  2261  		// ConnectTunnel will allocate significant memory, so first attempt to
  2262  		// reclaim as much as possible.
  2263  		DoGarbageCollection()
  2264  
  2265  		tunnel, err := ConnectTunnel(
  2266  			controller.establishCtx,
  2267  			controller.config,
  2268  			candidateServerEntry.adjustedEstablishStartTime,
  2269  			dialParams)
  2270  
  2271  		controller.concurrentEstablishTunnelsMutex.Lock()
  2272  		if isIntensive {
  2273  			controller.concurrentIntensiveEstablishTunnels -= 1
  2274  		}
  2275  		controller.concurrentEstablishTunnels -= 1
  2276  		controller.concurrentEstablishTunnelsMutex.Unlock()
  2277  
  2278  		// Periodically emit memory metrics during the establishment cycle.
  2279  		if !controller.isStopEstablishing() {
  2280  			emitMemoryMetrics()
  2281  		}
  2282  
  2283  		// Immediately reclaim memory allocated by the establishment. In the case
  2284  		// of failure, first clear the reference to the tunnel. In the case of
  2285  		// success, the garbage collection may still be effective as the initial
  2286  		// phases of some protocols involve significant memory allocation that
  2287  		// could now be reclaimed.
  2288  		if err != nil {
  2289  			tunnel = nil
  2290  		}
  2291  		DoGarbageCollection()
  2292  
  2293  		if err != nil {
  2294  
  2295  			// Unblock other candidates immediately when server affinity
  2296  			// candidate fails.
  2297  			if candidateServerEntry.isServerAffinityCandidate {
  2298  				close(controller.serverAffinityDoneBroadcast)
  2299  			}
  2300  
  2301  			// Before emitting error, check if establish interrupted, in which
  2302  			// case the error is noise.
  2303  			if controller.isStopEstablishing() {
  2304  				break loop
  2305  			}
  2306  
  2307  			NoticeInfo("failed to connect to %s: %v",
  2308  				candidateServerEntry.serverEntry.GetDiagnosticID(),
  2309  				errors.Trace(err))
  2310  
  2311  			continue
  2312  		}
  2313  
  2314  		// Deliver connected tunnel.
  2315  		// Don't block. Assumes the receiver has a buffer large enough for
  2316  		// the number of desired tunnels. If there's no room, the tunnel must
  2317  		// not be required so it's discarded.
  2318  		select {
  2319  		case controller.connectedTunnels <- tunnel:
  2320  		default:
  2321  			controller.discardTunnel(tunnel)
  2322  
  2323  			// Clear the reference to this discarded tunnel and immediately run
  2324  			// a garbage collection to reclaim its memory.
  2325  			tunnel = nil
  2326  			DoGarbageCollection()
  2327  		}
  2328  
  2329  		// Unblock other candidates only after delivering when
  2330  		// server affinity candidate succeeds.
  2331  		if candidateServerEntry.isServerAffinityCandidate {
  2332  			close(controller.serverAffinityDoneBroadcast)
  2333  		}
  2334  	}
  2335  }
  2336  
  2337  func (controller *Controller) isStopEstablishing() bool {
  2338  	select {
  2339  	case <-controller.establishCtx.Done():
  2340  		return true
  2341  	default:
  2342  	}
  2343  	return false
  2344  }