github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/controller.go (about)

     1  /*
     2   * Copyright (c) 2015, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package psiphon implements the core tunnel functionality of a Psiphon client.
    21  // The main function is RunForever, which runs a Controller that obtains lists of
    22  // servers, establishes tunnel connections, and runs local proxies through which
    23  // tunneled traffic may be sent.
    24  package psiphon
    25  
    26  import (
    27  	"context"
    28  	"fmt"
    29  	"math/rand"
    30  	"net"
    31  	"runtime"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  
    36  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    37  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    38  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
    39  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
    40  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
    41  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/resolver"
    42  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tun"
    43  	lrucache "github.com/cognusion/go-cache-lru"
    44  )
    45  
    46  // Controller is a tunnel lifecycle coordinator. It manages lists of servers to
    47  // connect to; establishes and monitors tunnels; and runs local proxies which
    48  // route traffic through the tunnels.
    49  type Controller struct {
    50  	config                                  *Config
    51  	runCtx                                  context.Context
    52  	stopRunning                             context.CancelFunc
    53  	runWaitGroup                            *sync.WaitGroup
    54  	connectedTunnels                        chan *Tunnel
    55  	failedTunnels                           chan *Tunnel
    56  	tunnelMutex                             sync.Mutex
    57  	establishedOnce                         bool
    58  	tunnelPoolSize                          int
    59  	tunnels                                 []*Tunnel
    60  	nextTunnel                              int
    61  	isEstablishing                          bool
    62  	establishStartTime                      time.Time
    63  	protocolSelectionConstraints            *protocolSelectionConstraints
    64  	concurrentEstablishTunnelsMutex         sync.Mutex
    65  	establishConnectTunnelCount             int
    66  	concurrentEstablishTunnels              int
    67  	concurrentIntensiveEstablishTunnels     int
    68  	peakConcurrentEstablishTunnels          int
    69  	peakConcurrentIntensiveEstablishTunnels int
    70  	establishCtx                            context.Context
    71  	stopEstablish                           context.CancelFunc
    72  	establishWaitGroup                      *sync.WaitGroup
    73  	establishedTunnelsCount                 int32
    74  	candidateServerEntries                  chan *candidateServerEntry
    75  	untunneledDialConfig                    *DialConfig
    76  	untunneledSplitTunnelClassifications    *lrucache.Cache
    77  	splitTunnelClassificationTTL            time.Duration
    78  	splitTunnelClassificationMaxEntries     int
    79  	signalFetchCommonRemoteServerList       chan struct{}
    80  	signalFetchObfuscatedServerLists        chan struct{}
    81  	signalDownloadUpgrade                   chan string
    82  	signalReportServerEntries               chan *serverEntriesReportRequest
    83  	signalReportConnected                   chan struct{}
    84  	signalRestartEstablishing               chan struct{}
    85  	serverAffinityDoneBroadcast             chan struct{}
    86  	packetTunnelClient                      *tun.Client
    87  	packetTunnelTransport                   *PacketTunnelTransport
    88  	staggerMutex                            sync.Mutex
    89  	resolver                                *resolver.Resolver
    90  }
    91  
    92  // NewController initializes a new controller.
    93  func NewController(config *Config) (controller *Controller, err error) {
    94  
    95  	if !config.IsCommitted() {
    96  		return nil, errors.TraceNew("uncommitted config")
    97  	}
    98  
    99  	// Needed by regen, at least
   100  	rand.Seed(int64(time.Now().Nanosecond()))
   101  
   102  	// The session ID for the Psiphon server API is used across all
   103  	// tunnels established by the controller.
   104  	NoticeSessionId(config.SessionID)
   105  
   106  	// Attempt to apply any valid, local stored tactics. The pre-done context
   107  	// ensures no tactics request is attempted now.
   108  	doneContext, cancelFunc := context.WithCancel(context.Background())
   109  	cancelFunc()
   110  	GetTactics(doneContext, config)
   111  
   112  	p := config.GetParameters().Get()
   113  	splitTunnelClassificationTTL :=
   114  		p.Duration(parameters.SplitTunnelClassificationTTL)
   115  	splitTunnelClassificationMaxEntries :=
   116  		p.Int(parameters.SplitTunnelClassificationMaxEntries)
   117  
   118  	controller = &Controller{
   119  		config:       config,
   120  		runWaitGroup: new(sync.WaitGroup),
   121  		// connectedTunnels and failedTunnels buffer sizes are large enough to
   122  		// receive full pools of tunnels without blocking. Senders should not block.
   123  		connectedTunnels: make(chan *Tunnel, MAX_TUNNEL_POOL_SIZE),
   124  		failedTunnels:    make(chan *Tunnel, MAX_TUNNEL_POOL_SIZE),
   125  		tunnelPoolSize:   TUNNEL_POOL_SIZE,
   126  		tunnels:          make([]*Tunnel, 0),
   127  		establishedOnce:  false,
   128  		isEstablishing:   false,
   129  
   130  		untunneledSplitTunnelClassifications: lrucache.NewWithLRU(
   131  			splitTunnelClassificationTTL,
   132  			1*time.Minute,
   133  			splitTunnelClassificationMaxEntries),
   134  
   135  		// TODO: Add a buffer of 1 so we don't miss a signal while receiver is
   136  		// starting? Trade-off is potential back-to-back fetch remotes. As-is,
   137  		// establish will eventually signal another fetch remote.
   138  		signalFetchCommonRemoteServerList: make(chan struct{}),
   139  		signalFetchObfuscatedServerLists:  make(chan struct{}),
   140  		signalDownloadUpgrade:             make(chan string),
   141  		signalReportConnected:             make(chan struct{}),
   142  
   143  		// Using a buffer of 1 to ensure there's no race between the first signal
   144  		// sent and a channel receiver initializing; a side effect is that this
   145  		// allows 1 additional scan to enqueue while a scan is in progress, possibly
   146  		// resulting in one unnecessary scan.
   147  		signalReportServerEntries: make(chan *serverEntriesReportRequest, 1),
   148  
   149  		// signalRestartEstablishing has a buffer of 1 to ensure sending the
   150  		// signal doesn't block and receiving won't miss a signal.
   151  		signalRestartEstablishing: make(chan struct{}, 1),
   152  	}
   153  
   154  	// Initialize untunneledDialConfig, used by untunneled dials including
   155  	// remote server list and upgrade downloads.
   156  	controller.untunneledDialConfig = &DialConfig{
   157  		UpstreamProxyURL: controller.config.UpstreamProxyURL,
   158  		CustomHeaders:    controller.config.CustomHeaders,
   159  		DeviceBinder:     controller.config.deviceBinder,
   160  		IPv6Synthesizer:  controller.config.IPv6Synthesizer,
   161  		ResolveIP: func(ctx context.Context, hostname string) ([]net.IP, error) {
   162  			IPs, err := UntunneledResolveIP(
   163  				ctx, controller.config, controller.resolver, hostname)
   164  			if err != nil {
   165  				return nil, errors.Trace(err)
   166  			}
   167  			return IPs, nil
   168  		},
   169  		TrustedCACertificatesFilename: controller.config.TrustedCACertificatesFilename,
   170  	}
   171  
   172  	if config.PacketTunnelTunFileDescriptor > 0 {
   173  
   174  		// Run a packet tunnel client. The lifetime of the tun.Client is the
   175  		// lifetime of the Controller, so it exists across tunnel establishments
   176  		// and reestablishments. The PacketTunnelTransport provides a layer
   177  		// that presents a continuosuly existing transport to the tun.Client;
   178  		// it's set to use new SSH channels after new SSH tunnel establishes.
   179  
   180  		packetTunnelTransport := NewPacketTunnelTransport()
   181  
   182  		packetTunnelClient, err := tun.NewClient(&tun.ClientConfig{
   183  			Logger:            NoticeCommonLogger(),
   184  			TunFileDescriptor: config.PacketTunnelTunFileDescriptor,
   185  			Transport:         packetTunnelTransport,
   186  		})
   187  		if err != nil {
   188  			return nil, errors.Trace(err)
   189  		}
   190  
   191  		controller.packetTunnelClient = packetTunnelClient
   192  		controller.packetTunnelTransport = packetTunnelTransport
   193  	}
   194  
   195  	return controller, nil
   196  }
   197  
   198  // Run executes the controller. Run exits if a controller
   199  // component fails or the parent context is canceled.
   200  func (controller *Controller) Run(ctx context.Context) {
   201  
   202  	if controller.config.LimitCPUThreads {
   203  		runtime.GOMAXPROCS(1)
   204  	}
   205  
   206  	pprofRun()
   207  
   208  	// Ensure fresh repetitive notice state for each run, so the
   209  	// client will always get an AvailableEgressRegions notice,
   210  	// an initial instance of any repetitive error notice, etc.
   211  	ResetRepetitiveNotices()
   212  
   213  	runCtx, stopRunning := context.WithCancel(ctx)
   214  	defer stopRunning()
   215  
   216  	controller.runCtx = runCtx
   217  	controller.stopRunning = stopRunning
   218  
   219  	// Start components
   220  
   221  	// Initialize a single resolver to be used by all dials. Sharing a single
   222  	// resolver ensures cached results are shared, and that network state
   223  	// query overhead is amortized over all dials. Multiple dials can resolve
   224  	// domain concurrently.
   225  	//
   226  	// config.SetResolver makes this resolver available to MakeDialParameters.
   227  	controller.resolver = NewResolver(controller.config, true)
   228  	defer controller.resolver.Stop()
   229  	controller.config.SetResolver(controller.resolver)
   230  
   231  	// TODO: IPv6 support
   232  	var listenIP string
   233  	if controller.config.ListenInterface == "" {
   234  		listenIP = "127.0.0.1"
   235  	} else if controller.config.ListenInterface == "any" {
   236  		listenIP = "0.0.0.0"
   237  	} else {
   238  		IPv4Address, _, err := common.GetInterfaceIPAddresses(controller.config.ListenInterface)
   239  		if err == nil && IPv4Address == nil {
   240  			err = fmt.Errorf("no IPv4 address for interface %s", controller.config.ListenInterface)
   241  		}
   242  		if err != nil {
   243  			NoticeError("error getting listener IP: %v", errors.Trace(err))
   244  			return
   245  		}
   246  		listenIP = IPv4Address.String()
   247  	}
   248  
   249  	if !controller.config.DisableLocalSocksProxy {
   250  		socksProxy, err := NewSocksProxy(controller.config, controller, listenIP)
   251  		if err != nil {
   252  			NoticeError("error initializing local SOCKS proxy: %v", errors.Trace(err))
   253  			return
   254  		}
   255  		defer socksProxy.Close()
   256  	}
   257  
   258  	if !controller.config.DisableLocalHTTPProxy {
   259  		httpProxy, err := NewHttpProxy(controller.config, controller, listenIP)
   260  		if err != nil {
   261  			NoticeError("error initializing local HTTP proxy: %v", errors.Trace(err))
   262  			return
   263  		}
   264  		defer httpProxy.Close()
   265  	}
   266  
   267  	if !controller.config.DisableRemoteServerListFetcher {
   268  
   269  		if controller.config.RemoteServerListURLs != nil {
   270  			controller.runWaitGroup.Add(1)
   271  			go controller.remoteServerListFetcher(
   272  				"common",
   273  				FetchCommonRemoteServerList,
   274  				controller.signalFetchCommonRemoteServerList)
   275  		}
   276  
   277  		if controller.config.ObfuscatedServerListRootURLs != nil {
   278  			controller.runWaitGroup.Add(1)
   279  			go controller.remoteServerListFetcher(
   280  				"obfuscated",
   281  				FetchObfuscatedServerLists,
   282  				controller.signalFetchObfuscatedServerLists)
   283  		}
   284  	}
   285  
   286  	if controller.config.UpgradeDownloadURLs != nil {
   287  		controller.runWaitGroup.Add(1)
   288  		go controller.upgradeDownloader()
   289  	}
   290  
   291  	controller.runWaitGroup.Add(1)
   292  	go controller.serverEntriesReporter()
   293  
   294  	controller.runWaitGroup.Add(1)
   295  	go controller.connectedReporter()
   296  
   297  	controller.runWaitGroup.Add(1)
   298  	go controller.establishTunnelWatcher()
   299  
   300  	controller.runWaitGroup.Add(1)
   301  	go controller.runTunnels()
   302  
   303  	if controller.packetTunnelClient != nil {
   304  		controller.packetTunnelClient.Start()
   305  	}
   306  
   307  	// Wait while running
   308  
   309  	<-controller.runCtx.Done()
   310  	NoticeInfo("controller stopped")
   311  
   312  	if controller.packetTunnelClient != nil {
   313  		controller.packetTunnelClient.Stop()
   314  	}
   315  
   316  	// All workers -- runTunnels, establishment workers, and auxilliary
   317  	// workers such as fetch remote server list and untunneled uprade
   318  	// download -- operate with the controller run context and will all
   319  	// be interrupted when the run context is done.
   320  
   321  	controller.runWaitGroup.Wait()
   322  
   323  	NoticeInfo("exiting controller")
   324  
   325  	NoticeExiting()
   326  }
   327  
   328  // SignalComponentFailure notifies the controller that an associated component has failed.
   329  // This will terminate the controller.
   330  func (controller *Controller) SignalComponentFailure() {
   331  	NoticeWarning("controller shutdown due to component failure")
   332  	controller.stopRunning()
   333  }
   334  
   335  // SetDynamicConfig overrides the sponsor ID and authorizations fields of the
   336  // Controller config with the input values. The new values will be used in the
   337  // next tunnel connection.
   338  func (controller *Controller) SetDynamicConfig(sponsorID string, authorizations []string) {
   339  	controller.config.SetDynamicConfig(sponsorID, authorizations)
   340  }
   341  
   342  // TerminateNextActiveTunnel terminates the active tunnel, which will initiate
   343  // establishment of a new tunnel.
   344  func (controller *Controller) TerminateNextActiveTunnel() {
   345  	tunnel := controller.getNextActiveTunnel()
   346  	if tunnel != nil {
   347  		controller.SignalTunnelFailure(tunnel)
   348  		NoticeInfo("terminated tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
   349  	}
   350  }
   351  
   352  // ExportExchangePayload creates a payload for client-to-client server
   353  // connection info exchange. See the comment for psiphon.ExportExchangePayload
   354  // for more details.
   355  func (controller *Controller) ExportExchangePayload() string {
   356  	return ExportExchangePayload(controller.config)
   357  }
   358  
   359  // ImportExchangePayload imports a payload generated by ExportExchangePayload.
   360  // See the comment for psiphon.ImportExchangePayload for more details about
   361  // the import.
   362  //
   363  // When the import is successful, a signal is set to trigger a restart any
   364  // establishment in progress. This will cause the newly imported server entry
   365  // to be prioritized, which it otherwise would not be in later establishment
   366  // rounds. The establishment process continues after ImportExchangePayload
   367  // returns.
   368  //
   369  // If the client already has a connected tunnel, or a tunnel connection is
   370  // established concurrently with the import, the signal has no effect as the
   371  // overall goal is establish _any_ connection.
   372  func (controller *Controller) ImportExchangePayload(payload string) bool {
   373  
   374  	// Race condition: if a new tunnel connection is established concurrently
   375  	// with the import, either that tunnel's server entry of the imported server
   376  	// entry may end up as the affinity server.
   377  
   378  	ok := ImportExchangePayload(controller.config, payload)
   379  	if !ok {
   380  		return false
   381  	}
   382  
   383  	select {
   384  	case controller.signalRestartEstablishing <- struct{}{}:
   385  	default:
   386  	}
   387  
   388  	return true
   389  }
   390  
   391  // remoteServerListFetcher fetches an out-of-band list of server entries
   392  // for more tunnel candidates. It fetches when signalled, with retries
   393  // on failure.
   394  func (controller *Controller) remoteServerListFetcher(
   395  	name string,
   396  	fetcher RemoteServerListFetcher,
   397  	signal <-chan struct{}) {
   398  
   399  	defer controller.runWaitGroup.Done()
   400  
   401  	var lastFetchTime time.Time
   402  
   403  fetcherLoop:
   404  	for {
   405  		// Wait for a signal before fetching
   406  		select {
   407  		case <-signal:
   408  		case <-controller.runCtx.Done():
   409  			break fetcherLoop
   410  		}
   411  
   412  		// Skip fetch entirely (i.e., send no request at all, even when ETag would save
   413  		// on response size) when a recent fetch was successful
   414  
   415  		stalePeriod := controller.config.GetParameters().Get().Duration(
   416  			parameters.FetchRemoteServerListStalePeriod)
   417  
   418  		if !lastFetchTime.IsZero() &&
   419  			lastFetchTime.Add(stalePeriod).After(time.Now()) {
   420  			continue
   421  		}
   422  
   423  	retryLoop:
   424  		for attempt := 0; ; attempt++ {
   425  			// Don't attempt to fetch while there is no network connectivity,
   426  			// to avoid alert notice noise.
   427  			if !WaitForNetworkConnectivity(
   428  				controller.runCtx,
   429  				controller.config.NetworkConnectivityChecker) {
   430  				break fetcherLoop
   431  			}
   432  
   433  			// Pick any active tunnel and make the next fetch attempt. If there's
   434  			// no active tunnel, the untunneledDialConfig will be used.
   435  			tunnel := controller.getNextActiveTunnel()
   436  
   437  			err := fetcher(
   438  				controller.runCtx,
   439  				controller.config,
   440  				attempt,
   441  				tunnel,
   442  				controller.untunneledDialConfig)
   443  
   444  			if err == nil {
   445  				lastFetchTime = time.Now()
   446  				break retryLoop
   447  			}
   448  
   449  			NoticeWarning("failed to fetch %s remote server list: %v",
   450  				name, errors.Trace(err))
   451  
   452  			retryPeriod := controller.config.GetParameters().Get().Duration(
   453  				parameters.FetchRemoteServerListRetryPeriod)
   454  
   455  			timer := time.NewTimer(retryPeriod)
   456  			select {
   457  			case <-timer.C:
   458  			case <-controller.runCtx.Done():
   459  				timer.Stop()
   460  				break fetcherLoop
   461  			}
   462  		}
   463  	}
   464  
   465  	NoticeInfo("exiting %s remote server list fetcher", name)
   466  }
   467  
   468  // upgradeDownloader makes periodic attempts to complete a client upgrade
   469  // download. DownloadUpgrade() is resumable, so each attempt has potential for
   470  // getting closer to completion, even in conditions where the download or
   471  // tunnel is repeatedly interrupted.
   472  // An upgrade download is triggered by either a handshake response indicating
   473  // that a new version is available; or after failing to connect, in which case
   474  // it's useful to check, out-of-band, for an upgrade with new circumvention
   475  // capabilities.
   476  // Once the download operation completes successfully, the downloader exits
   477  // and is not run again: either there is not a newer version, or the upgrade
   478  // has been downloaded and is ready to be applied.
   479  // We're assuming that the upgrade will be applied and the entire system
   480  // restarted before another upgrade is to be downloaded.
   481  //
   482  // TODO: refactor upgrade downloader and remote server list fetcher to use
   483  // common code (including the resumable download routines).
   484  //
   485  func (controller *Controller) upgradeDownloader() {
   486  	defer controller.runWaitGroup.Done()
   487  
   488  	var lastDownloadTime time.Time
   489  
   490  downloadLoop:
   491  	for {
   492  		// Wait for a signal before downloading
   493  		var handshakeVersion string
   494  		select {
   495  		case handshakeVersion = <-controller.signalDownloadUpgrade:
   496  		case <-controller.runCtx.Done():
   497  			break downloadLoop
   498  		}
   499  
   500  		stalePeriod := controller.config.GetParameters().Get().Duration(
   501  			parameters.FetchUpgradeStalePeriod)
   502  
   503  		// Unless handshake is explicitly advertizing a new version, skip
   504  		// checking entirely when a recent download was successful.
   505  		if handshakeVersion == "" &&
   506  			!lastDownloadTime.IsZero() &&
   507  			lastDownloadTime.Add(stalePeriod).After(time.Now()) {
   508  			continue
   509  		}
   510  
   511  	retryLoop:
   512  		for attempt := 0; ; attempt++ {
   513  			// Don't attempt to download while there is no network connectivity,
   514  			// to avoid alert notice noise.
   515  			if !WaitForNetworkConnectivity(
   516  				controller.runCtx,
   517  				controller.config.NetworkConnectivityChecker) {
   518  				break downloadLoop
   519  			}
   520  
   521  			// Pick any active tunnel and make the next download attempt. If there's
   522  			// no active tunnel, the untunneledDialConfig will be used.
   523  			tunnel := controller.getNextActiveTunnel()
   524  
   525  			err := DownloadUpgrade(
   526  				controller.runCtx,
   527  				controller.config,
   528  				attempt,
   529  				handshakeVersion,
   530  				tunnel,
   531  				controller.untunneledDialConfig)
   532  
   533  			if err == nil {
   534  				lastDownloadTime = time.Now()
   535  				break retryLoop
   536  			}
   537  
   538  			NoticeWarning("failed to download upgrade: %v", errors.Trace(err))
   539  
   540  			timeout := controller.config.GetParameters().Get().Duration(
   541  				parameters.FetchUpgradeRetryPeriod)
   542  
   543  			timer := time.NewTimer(timeout)
   544  			select {
   545  			case <-timer.C:
   546  			case <-controller.runCtx.Done():
   547  				timer.Stop()
   548  				break downloadLoop
   549  			}
   550  		}
   551  	}
   552  
   553  	NoticeInfo("exiting upgrade downloader")
   554  }
   555  
   556  type serverEntriesReportRequest struct {
   557  	constraints   *protocolSelectionConstraints
   558  	awaitResponse chan *serverEntriesReportResponse
   559  }
   560  
   561  type serverEntriesReportResponse struct {
   562  	err                              error
   563  	candidates                       int
   564  	initialCandidates                int
   565  	initialCandidatesAnyEgressRegion int
   566  	availableEgressRegions           []string
   567  }
   568  
   569  // serverEntriesReporter performs scans over all server entries to report on
   570  // available tunnel candidates, subject to protocol selection constraints, and
   571  // available egress regions.
   572  //
   573  // Because scans may be slow, depending on the client device and server entry
   574  // list size, serverEntriesReporter is used to perform asychronous, background
   575  // operations that would otherwise block establishment. This includes emitting
   576  // diagnotic notices that are informational (CandidateServers) or which do not
   577  // need to emit before establishment starts (AvailableEgressRegions).
   578  //
   579  // serverEntriesReporter also serves to combine these scans, which would
   580  // otherwise be logically independent, due to the performance impact of scans.
   581  //
   582  // The underlying datastore implementation _may_ block write transactions
   583  // while there are open read transactions. For example, bolt write
   584  // transactions which need to  re-map the data file (when the datastore grows)
   585  // will block on open read transactions. In these scenarios, a slow scan will
   586  // still block other operations.
   587  //
   588  // serverEntriesReporter runs beyond the establishment phase, since it's
   589  // important for notices such as AvailableEgressRegions to eventually emit
   590  // even if already established. serverEntriesReporter scans are cancellable,
   591  // so controller shutdown is not blocked by slow scans.
   592  //
   593  // In some special cases, establishment cannot begin without candidate counts
   594  // up front. In these cases only, the request contains a non-nil
   595  // awaitResponse, a channel which is used by the requester to block until the
   596  // scan is complete and the candidate counts are available.
   597  func (controller *Controller) serverEntriesReporter() {
   598  	defer controller.runWaitGroup.Done()
   599  
   600  loop:
   601  	for {
   602  
   603  		var request *serverEntriesReportRequest
   604  
   605  		select {
   606  		case request = <-controller.signalReportServerEntries:
   607  		case <-controller.runCtx.Done():
   608  			break loop
   609  		}
   610  
   611  		egressRegion := controller.config.EgressRegion
   612  		constraints := request.constraints
   613  
   614  		var response serverEntriesReportResponse
   615  
   616  		regions := make(map[string]bool)
   617  
   618  		callback := func(serverEntry *protocol.ServerEntry) bool {
   619  
   620  			// In establishment, excludeIntensive depends on what set of protocols are
   621  			// already being dialed. For these reports, don't exclude intensive
   622  			// protocols as any intensive candidate can always be an available
   623  			// candidate at some point.
   624  			excludeIntensive := false
   625  
   626  			isInitialCandidate := constraints.isInitialCandidate(excludeIntensive, serverEntry)
   627  			isCandidate := constraints.isCandidate(excludeIntensive, serverEntry)
   628  
   629  			if isInitialCandidate {
   630  				response.initialCandidatesAnyEgressRegion += 1
   631  			}
   632  
   633  			if egressRegion == "" || serverEntry.Region == egressRegion {
   634  				if isInitialCandidate {
   635  					response.initialCandidates += 1
   636  				}
   637  				if isCandidate {
   638  					response.candidates += 1
   639  				}
   640  			}
   641  
   642  			isAvailable := isCandidate
   643  			if constraints.hasInitialProtocols() {
   644  				// Available egress regions is subject to an initial limit constraint, if
   645  				// present: see AvailableEgressRegions comment in launchEstablishing.
   646  				isAvailable = isInitialCandidate
   647  			}
   648  
   649  			if isAvailable {
   650  				// Ignore server entries with no region field.
   651  				if serverEntry.Region != "" {
   652  					regions[serverEntry.Region] = true
   653  				}
   654  			}
   655  
   656  			select {
   657  			case <-controller.runCtx.Done():
   658  				// Don't block controller shutdown: cancel the scan.
   659  				return false
   660  			default:
   661  				return true
   662  			}
   663  		}
   664  
   665  		startTime := time.Now()
   666  
   667  		response.err = ScanServerEntries(callback)
   668  
   669  		// Report this duration in CandidateServers as an indication of datastore
   670  		// performance.
   671  		duration := time.Since(startTime)
   672  
   673  		response.availableEgressRegions = make([]string, 0, len(regions))
   674  		for region := range regions {
   675  			response.availableEgressRegions = append(response.availableEgressRegions, region)
   676  		}
   677  
   678  		if response.err != nil {
   679  
   680  			// For diagnostics, we'll post this even when cancelled due to shutdown.
   681  			NoticeWarning("ScanServerEntries failed: %v", errors.Trace(response.err))
   682  
   683  			// Continue and send error reponse. Clear any partial data to avoid
   684  			// misuse.
   685  			response.candidates = 0
   686  			response.initialCandidates = 0
   687  			response.initialCandidatesAnyEgressRegion = 0
   688  			response.availableEgressRegions = []string{}
   689  		}
   690  
   691  		if request.awaitResponse != nil {
   692  			select {
   693  			case request.awaitResponse <- &response:
   694  			case <-controller.runCtx.Done():
   695  				// The receiver may be gone when shutting down.
   696  			}
   697  		}
   698  
   699  		if response.err == nil {
   700  
   701  			NoticeCandidateServers(
   702  				controller.config.EgressRegion,
   703  				controller.protocolSelectionConstraints,
   704  				response.initialCandidates,
   705  				response.candidates,
   706  				duration)
   707  
   708  			NoticeAvailableEgressRegions(
   709  				response.availableEgressRegions)
   710  		}
   711  	}
   712  
   713  	NoticeInfo("exiting server entries reporter")
   714  }
   715  
   716  // signalServerEntriesReporter triggers a new server entry report. Set
   717  // request.awaitResponse to obtain the report output. When awaitResponse is
   718  // set, signalServerEntriesReporter blocks until the reporter receives the
   719  // request, guaranteeing the new report runs. Otherwise, the report is
   720  // considered to be informational and may or may not run, depending on whether
   721  // another run is already in progress.
   722  func (controller *Controller) signalServerEntriesReporter(request *serverEntriesReportRequest) {
   723  
   724  	if request.awaitResponse == nil {
   725  		select {
   726  		case controller.signalReportServerEntries <- request:
   727  		default:
   728  		}
   729  	} else {
   730  		controller.signalReportServerEntries <- request
   731  	}
   732  }
   733  
   734  // connectedReporter sends periodic "connected" requests to the Psiphon API.
   735  // These requests are for server-side unique user stats calculation. See the
   736  // comment in DoConnectedRequest for a description of the request mechanism.
   737  //
   738  // To correctly count daily unique users, only one connected request is made
   739  // across all simultaneous multi-tunnels; and the connected request is
   740  // repeated every 24h.
   741  //
   742  // The signalReportConnected mechanism is used to trigger a connected request
   743  // immediately after a reconnect. While strictly only one connected request
   744  // per 24h is required in order to count daily unique users, the connected
   745  // request also delivers the establishment duration metric (which includes
   746  // time elapsed performing the handshake request) and additional fragmentation
   747  // metrics; these metrics are measured for each tunnel.
   748  func (controller *Controller) connectedReporter() {
   749  	defer controller.runWaitGroup.Done()
   750  
   751  	// session is nil when DisableApi is set
   752  	if controller.config.DisableApi {
   753  		return
   754  	}
   755  
   756  	select {
   757  	case <-controller.signalReportConnected:
   758  		// Make the initial connected request
   759  	case <-controller.runCtx.Done():
   760  		return
   761  	}
   762  
   763  loop:
   764  	for {
   765  
   766  		// Pick any active tunnel and make the next connected request. No error is
   767  		// logged if there's no active tunnel, as that's not an unexpected
   768  		// condition.
   769  		reported := false
   770  		tunnel := controller.getNextActiveTunnel()
   771  		if tunnel != nil {
   772  			err := tunnel.serverContext.DoConnectedRequest()
   773  			if err == nil {
   774  				reported = true
   775  			} else {
   776  				NoticeWarning("failed to make connected request: %v",
   777  					errors.Trace(err))
   778  			}
   779  		}
   780  
   781  		// Schedule the next connected request and wait. This duration is not a
   782  		// dynamic ClientParameter as the daily unique user stats logic specifically
   783  		// requires a "connected" request no more or less often than every 24h.
   784  		var duration time.Duration
   785  		if reported {
   786  			duration = 24 * time.Hour
   787  		} else {
   788  			duration = controller.config.GetParameters().Get().Duration(
   789  				parameters.PsiphonAPIConnectedRequestRetryPeriod)
   790  		}
   791  		timer := time.NewTimer(duration)
   792  		doBreak := false
   793  		select {
   794  		case <-controller.signalReportConnected:
   795  		case <-timer.C:
   796  			// Make another connected request
   797  		case <-controller.runCtx.Done():
   798  			doBreak = true
   799  		}
   800  		timer.Stop()
   801  		if doBreak {
   802  			break loop
   803  		}
   804  	}
   805  
   806  	NoticeInfo("exiting connected reporter")
   807  }
   808  
   809  func (controller *Controller) signalConnectedReporter() {
   810  
   811  	// session is nil when DisableApi is set
   812  	if controller.config.DisableApi {
   813  		return
   814  	}
   815  
   816  	select {
   817  	case controller.signalReportConnected <- struct{}{}:
   818  	default:
   819  	}
   820  }
   821  
   822  // establishTunnelWatcher terminates the controller if a tunnel
   823  // has not been established in the configured time period. This
   824  // is regardless of how many tunnels are presently active -- meaning
   825  // that if an active tunnel was established and lost the controller
   826  // is left running (to re-establish).
   827  func (controller *Controller) establishTunnelWatcher() {
   828  	defer controller.runWaitGroup.Done()
   829  
   830  	timeout := controller.config.GetParameters().Get().Duration(
   831  		parameters.EstablishTunnelTimeout)
   832  
   833  	if timeout > 0 {
   834  		timer := time.NewTimer(timeout)
   835  		defer timer.Stop()
   836  
   837  		select {
   838  		case <-timer.C:
   839  			if !controller.hasEstablishedOnce() {
   840  				NoticeEstablishTunnelTimeout(timeout)
   841  				controller.SignalComponentFailure()
   842  			}
   843  		case <-controller.runCtx.Done():
   844  		}
   845  	}
   846  
   847  	NoticeInfo("exiting establish tunnel watcher")
   848  }
   849  
   850  // runTunnels is the controller tunnel management main loop. It starts and stops
   851  // establishing tunnels based on the target tunnel pool size and the current size
   852  // of the pool. Tunnels are established asynchronously using worker goroutines.
   853  //
   854  // When there are no server entries for the target region/protocol, the
   855  // establishCandidateGenerator will yield no candidates and wait before
   856  // trying again. In the meantime, a remote server entry fetch may supply
   857  // valid candidates.
   858  //
   859  // When a tunnel is established, it's added to the active pool. The tunnel's
   860  // operateTunnel goroutine monitors the tunnel.
   861  //
   862  // When a tunnel fails, it's removed from the pool and the establish process is
   863  // restarted to fill the pool.
   864  func (controller *Controller) runTunnels() {
   865  	defer controller.runWaitGroup.Done()
   866  
   867  	// Start running
   868  
   869  	controller.startEstablishing()
   870  loop:
   871  	for {
   872  		select {
   873  
   874  		case <-controller.signalRestartEstablishing:
   875  
   876  			// signalRestartEstablishing restarts any establishment in progress. One
   877  			// use case for this is to prioritize a newly imported, exchanged server
   878  			// entry, which will be in the affinity position.
   879  			//
   880  			// It's possible for another connection to establish concurrent to signalling;
   881  			// since the overall goal remains to establish _any_ connection, we accept that
   882  			// in some cases the exchanged server entry may not get used.
   883  
   884  			if controller.isEstablishing {
   885  				controller.stopEstablishing()
   886  				controller.startEstablishing()
   887  			}
   888  
   889  		case failedTunnel := <-controller.failedTunnels:
   890  			NoticeWarning("tunnel failed: %s", failedTunnel.dialParams.ServerEntry.GetDiagnosticID())
   891  			controller.terminateTunnel(failedTunnel)
   892  
   893  			// Clear the reference to this tunnel before calling startEstablishing,
   894  			// which will invoke a garbage collection.
   895  			failedTunnel = nil
   896  
   897  			// Concurrency note: only this goroutine may call startEstablishing/stopEstablishing,
   898  			// which reference controller.isEstablishing.
   899  			controller.startEstablishing()
   900  
   901  		case connectedTunnel := <-controller.connectedTunnels:
   902  
   903  			// Tunnel establishment has two phases: connection and activation.
   904  			//
   905  			// Connection is run concurrently by the establishTunnelWorkers, to minimize
   906  			// delay when it's not yet known which server and protocol will be available
   907  			// and unblocked.
   908  			//
   909  			// Activation is run serially, here, to minimize the overhead of making a
   910  			// handshake request and starting the operateTunnel management worker for a
   911  			// tunnel which may be discarded.
   912  			//
   913  			// When the active tunnel will complete establishment, establishment is
   914  			// stopped before activation. This interrupts all connecting tunnels and
   915  			// garbage collects their memory. The purpose is to minimize memory
   916  			// pressure when the handshake request is made. In the unlikely case that the
   917  			// handshake fails, establishment is restarted.
   918  			//
   919  			// Any delays in stopEstablishing will delay the handshake for the last
   920  			// active tunnel.
   921  			//
   922  			// In the typical case of tunnelPoolSize of 1, only a single handshake is
   923  			// performed and the homepages notices file, when used, will not be modifed
   924  			// after the NoticeTunnels(1) [i.e., connected] until NoticeTunnels(0) [i.e.,
   925  			// disconnected]. For tunnelPoolSize > 1, serial handshakes only ensures that
   926  			// each set of emitted NoticeHomepages is contiguous.
   927  
   928  			active, outstanding := controller.numTunnels()
   929  
   930  			// discardTunnel will be true here when already fully established.
   931  
   932  			discardTunnel := (outstanding <= 0)
   933  			isFirstTunnel := (active == 0)
   934  			isLastTunnel := (outstanding == 1)
   935  
   936  			if !discardTunnel {
   937  
   938  				if isLastTunnel {
   939  					controller.stopEstablishing()
   940  				}
   941  
   942  				err := connectedTunnel.Activate(controller.runCtx, controller)
   943  
   944  				if err != nil {
   945  					NoticeWarning("failed to activate %s: %v",
   946  						connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   947  						errors.Trace(err))
   948  					discardTunnel = true
   949  				} else {
   950  					// It's unlikely that registerTunnel will fail, since only this goroutine
   951  					// calls registerTunnel -- and after checking numTunnels; so failure is not
   952  					// expected.
   953  					if !controller.registerTunnel(connectedTunnel) {
   954  						NoticeWarning("failed to register %s: %v",
   955  							connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   956  							errors.Trace(err))
   957  						discardTunnel = true
   958  					}
   959  				}
   960  
   961  				// May need to replace this tunnel
   962  				if isLastTunnel && discardTunnel {
   963  					controller.startEstablishing()
   964  				}
   965  
   966  			}
   967  
   968  			if discardTunnel {
   969  				controller.discardTunnel(connectedTunnel)
   970  
   971  				// Clear the reference to this discarded tunnel and immediately run
   972  				// a garbage collection to reclaim its memory.
   973  				connectedTunnel = nil
   974  				DoGarbageCollection()
   975  
   976  				// Skip the rest of this case
   977  				break
   978  			}
   979  
   980  			atomic.AddInt32(&controller.establishedTunnelsCount, 1)
   981  
   982  			NoticeActiveTunnel(
   983  				connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
   984  				connectedTunnel.dialParams.TunnelProtocol,
   985  				connectedTunnel.dialParams.ServerEntry.SupportsSSHAPIRequests())
   986  
   987  			if isFirstTunnel {
   988  
   989  				// Signal a connected request on each 1st tunnel establishment. For
   990  				// multi-tunnels, the session is connected as long as at least one
   991  				// tunnel is established.
   992  				controller.signalConnectedReporter()
   993  
   994  				// If the handshake indicated that a new client version is available,
   995  				// trigger an upgrade download.
   996  				// Note: serverContext is nil when DisableApi is set
   997  				if connectedTunnel.serverContext != nil &&
   998  					connectedTunnel.serverContext.clientUpgradeVersion != "" {
   999  
  1000  					handshakeVersion := connectedTunnel.serverContext.clientUpgradeVersion
  1001  					select {
  1002  					case controller.signalDownloadUpgrade <- handshakeVersion:
  1003  					default:
  1004  					}
  1005  				}
  1006  			}
  1007  
  1008  			// Set the new tunnel as the transport for the packet tunnel. The packet tunnel
  1009  			// client remains up when reestablishing, but no packets are relayed while there
  1010  			// is no connected tunnel. UseTunnel will establish a new packet tunnel SSH
  1011  			// channel over the new SSH tunnel and configure the packet tunnel client to use
  1012  			// the new SSH channel as its transport.
  1013  			//
  1014  			// Note: as is, this logic is suboptimal for tunnelPoolSize > 1, as this would
  1015  			// continuously initialize new packet tunnel sessions for each established
  1016  			// server. For now, config validation requires tunnelPoolSize == 1 when
  1017  			// the packet tunnel is used.
  1018  
  1019  			if controller.packetTunnelTransport != nil {
  1020  				controller.packetTunnelTransport.UseTunnel(connectedTunnel)
  1021  			}
  1022  
  1023  			if controller.isFullyEstablished() {
  1024  				controller.stopEstablishing()
  1025  			}
  1026  
  1027  		case <-controller.runCtx.Done():
  1028  			break loop
  1029  		}
  1030  	}
  1031  
  1032  	// Stop running
  1033  
  1034  	controller.stopEstablishing()
  1035  	controller.terminateAllTunnels()
  1036  
  1037  	// Drain tunnel channels
  1038  	close(controller.connectedTunnels)
  1039  	for tunnel := range controller.connectedTunnels {
  1040  		controller.discardTunnel(tunnel)
  1041  	}
  1042  	close(controller.failedTunnels)
  1043  	for tunnel := range controller.failedTunnels {
  1044  		controller.discardTunnel(tunnel)
  1045  	}
  1046  
  1047  	NoticeInfo("exiting run tunnels")
  1048  }
  1049  
  1050  // SignalSeededNewSLOK implements the TunnelOwner interface. This function
  1051  // is called by Tunnel.operateTunnel when the tunnel has received a new,
  1052  // previously unknown SLOK from the server. The Controller triggers an OSL
  1053  // fetch, as the new SLOK may be sufficient to access new OSLs.
  1054  func (controller *Controller) SignalSeededNewSLOK() {
  1055  	select {
  1056  	case controller.signalFetchObfuscatedServerLists <- struct{}{}:
  1057  	default:
  1058  	}
  1059  }
  1060  
  1061  // SignalTunnelFailure implements the TunnelOwner interface. This function
  1062  // is called by Tunnel.operateTunnel when the tunnel has detected that it
  1063  // has failed. The Controller will signal runTunnels to create a new
  1064  // tunnel and/or remove the tunnel from the list of active tunnels.
  1065  func (controller *Controller) SignalTunnelFailure(tunnel *Tunnel) {
  1066  	// Don't block. Assumes the receiver has a buffer large enough for
  1067  	// the typical number of operated tunnels. In case there's no room,
  1068  	// terminate the tunnel (runTunnels won't get a signal in this case,
  1069  	// but the tunnel will be removed from the list of active tunnels).
  1070  	select {
  1071  	case controller.failedTunnels <- tunnel:
  1072  	default:
  1073  		controller.terminateTunnel(tunnel)
  1074  	}
  1075  }
  1076  
  1077  // discardTunnel disposes of a successful connection that is no longer required.
  1078  func (controller *Controller) discardTunnel(tunnel *Tunnel) {
  1079  	NoticeInfo("discard tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1080  	// TODO: not calling PromoteServerEntry, since that would rank the
  1081  	// discarded tunnel before fully active tunnels. Can a discarded tunnel
  1082  	// be promoted (since it connects), but with lower rank than all active
  1083  	// tunnels?
  1084  	tunnel.Close(true)
  1085  }
  1086  
  1087  // registerTunnel adds the connected tunnel to the pool of active tunnels
  1088  // which are candidates for port forwarding. Returns true if the pool has an
  1089  // empty slot and false if the pool is full (caller should discard the tunnel).
  1090  func (controller *Controller) registerTunnel(tunnel *Tunnel) bool {
  1091  	controller.tunnelMutex.Lock()
  1092  	defer controller.tunnelMutex.Unlock()
  1093  	if len(controller.tunnels) >= controller.tunnelPoolSize {
  1094  		return false
  1095  	}
  1096  	// Perform a final check just in case we've established
  1097  	// a duplicate connection.
  1098  	for _, activeTunnel := range controller.tunnels {
  1099  		if activeTunnel.dialParams.ServerEntry.IpAddress ==
  1100  			tunnel.dialParams.ServerEntry.IpAddress {
  1101  
  1102  			NoticeWarning("duplicate tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
  1103  			return false
  1104  		}
  1105  	}
  1106  	controller.establishedOnce = true
  1107  	controller.tunnels = append(controller.tunnels, tunnel)
  1108  	NoticeTunnels(len(controller.tunnels))
  1109  
  1110  	// Promote this successful tunnel to first rank so it's one
  1111  	// of the first candidates next time establish runs.
  1112  	// Connecting to a TargetServerEntry does not change the
  1113  	// ranking.
  1114  	if controller.config.TargetServerEntry == "" {
  1115  		PromoteServerEntry(controller.config, tunnel.dialParams.ServerEntry.IpAddress)
  1116  	}
  1117  
  1118  	return true
  1119  }
  1120  
  1121  // hasEstablishedOnce indicates if at least one active tunnel has
  1122  // been established up to this point. This is regardeless of how many
  1123  // tunnels are presently active.
  1124  func (controller *Controller) hasEstablishedOnce() bool {
  1125  	controller.tunnelMutex.Lock()
  1126  	defer controller.tunnelMutex.Unlock()
  1127  	return controller.establishedOnce
  1128  }
  1129  
  1130  // isFullyEstablished indicates if the pool of active tunnels is full.
  1131  func (controller *Controller) isFullyEstablished() bool {
  1132  	controller.tunnelMutex.Lock()
  1133  	defer controller.tunnelMutex.Unlock()
  1134  	return len(controller.tunnels) >= controller.tunnelPoolSize
  1135  }
  1136  
  1137  // numTunnels returns the number of active and outstanding tunnels.
  1138  // Oustanding is the number of tunnels required to fill the pool of
  1139  // active tunnels.
  1140  func (controller *Controller) numTunnels() (int, int) {
  1141  	controller.tunnelMutex.Lock()
  1142  	defer controller.tunnelMutex.Unlock()
  1143  	active := len(controller.tunnels)
  1144  	outstanding := controller.tunnelPoolSize - len(controller.tunnels)
  1145  	return active, outstanding
  1146  }
  1147  
  1148  // terminateTunnel removes a tunnel from the pool of active tunnels
  1149  // and closes the tunnel. The next-tunnel state used by getNextActiveTunnel
  1150  // is adjusted as required.
  1151  func (controller *Controller) terminateTunnel(tunnel *Tunnel) {
  1152  	controller.tunnelMutex.Lock()
  1153  	defer controller.tunnelMutex.Unlock()
  1154  	for index, activeTunnel := range controller.tunnels {
  1155  		if tunnel == activeTunnel {
  1156  			controller.tunnels = append(
  1157  				controller.tunnels[:index], controller.tunnels[index+1:]...)
  1158  			if controller.nextTunnel > index {
  1159  				controller.nextTunnel--
  1160  			}
  1161  			if controller.nextTunnel >= len(controller.tunnels) {
  1162  				controller.nextTunnel = 0
  1163  			}
  1164  			activeTunnel.Close(false)
  1165  			NoticeTunnels(len(controller.tunnels))
  1166  			break
  1167  		}
  1168  	}
  1169  }
  1170  
  1171  // terminateAllTunnels empties the tunnel pool, closing all active tunnels.
  1172  // This is used when shutting down the controller.
  1173  func (controller *Controller) terminateAllTunnels() {
  1174  	controller.tunnelMutex.Lock()
  1175  	defer controller.tunnelMutex.Unlock()
  1176  	// Closing all tunnels in parallel. In an orderly shutdown, each tunnel
  1177  	// may take a few seconds to send a final status request. We only want
  1178  	// to wait as long as the single slowest tunnel.
  1179  	closeWaitGroup := new(sync.WaitGroup)
  1180  	closeWaitGroup.Add(len(controller.tunnels))
  1181  	for _, activeTunnel := range controller.tunnels {
  1182  		tunnel := activeTunnel
  1183  		go func() {
  1184  			defer closeWaitGroup.Done()
  1185  			tunnel.Close(false)
  1186  		}()
  1187  	}
  1188  	closeWaitGroup.Wait()
  1189  	controller.tunnels = make([]*Tunnel, 0)
  1190  	controller.nextTunnel = 0
  1191  	NoticeTunnels(len(controller.tunnels))
  1192  }
  1193  
  1194  // getNextActiveTunnel returns the next tunnel from the pool of active
  1195  // tunnels. Currently, tunnel selection order is simple round-robin.
  1196  func (controller *Controller) getNextActiveTunnel() (tunnel *Tunnel) {
  1197  	controller.tunnelMutex.Lock()
  1198  	defer controller.tunnelMutex.Unlock()
  1199  	if len(controller.tunnels) == 0 {
  1200  		return nil
  1201  	}
  1202  	tunnel = controller.tunnels[controller.nextTunnel]
  1203  	controller.nextTunnel =
  1204  		(controller.nextTunnel + 1) % len(controller.tunnels)
  1205  	return tunnel
  1206  }
  1207  
  1208  // isActiveTunnelServerEntry is used to check if there's already
  1209  // an existing tunnel to a candidate server.
  1210  func (controller *Controller) isActiveTunnelServerEntry(
  1211  	serverEntry *protocol.ServerEntry) bool {
  1212  
  1213  	controller.tunnelMutex.Lock()
  1214  	defer controller.tunnelMutex.Unlock()
  1215  	for _, activeTunnel := range controller.tunnels {
  1216  		if activeTunnel.dialParams.ServerEntry.IpAddress == serverEntry.IpAddress {
  1217  			return true
  1218  		}
  1219  	}
  1220  	return false
  1221  }
  1222  
  1223  func (controller *Controller) setTunnelPoolSize(tunnelPoolSize int) {
  1224  	controller.tunnelMutex.Lock()
  1225  	defer controller.tunnelMutex.Unlock()
  1226  	if tunnelPoolSize < 1 {
  1227  		tunnelPoolSize = 1
  1228  	}
  1229  	if tunnelPoolSize > MAX_TUNNEL_POOL_SIZE {
  1230  		tunnelPoolSize = MAX_TUNNEL_POOL_SIZE
  1231  	}
  1232  	controller.tunnelPoolSize = tunnelPoolSize
  1233  }
  1234  
  1235  func (controller *Controller) getTunnelPoolSize() int {
  1236  	controller.tunnelMutex.Lock()
  1237  	defer controller.tunnelMutex.Unlock()
  1238  	return controller.tunnelPoolSize
  1239  }
  1240  
  1241  // Dial selects an active tunnel and establishes a port forward
  1242  // connection through the selected tunnel. Failure to connect is considered
  1243  // a port forward failure, for the purpose of monitoring tunnel health.
  1244  //
  1245  // When split tunnel mode is enabled, the connection may be untunneled,
  1246  // depending on GeoIP classification of the destination.
  1247  //
  1248  // downstreamConn is an optional parameter which specifies a connection to be
  1249  // explicitly closed when the dialed connection is closed. For instance, this
  1250  // is used to close downstreamConn App<->LocalProxy connections when the
  1251  // related LocalProxy<->SshPortForward connections close.
  1252  func (controller *Controller) Dial(
  1253  	remoteAddr string, downstreamConn net.Conn) (conn net.Conn, err error) {
  1254  
  1255  	tunnel := controller.getNextActiveTunnel()
  1256  	if tunnel == nil {
  1257  		return nil, errors.TraceNew("no active tunnels")
  1258  	}
  1259  
  1260  	if !tunnel.config.IsSplitTunnelEnabled() {
  1261  
  1262  		tunneledConn, splitTunnel, err := tunnel.DialTCPChannel(
  1263  			remoteAddr, false, downstreamConn)
  1264  		if err != nil {
  1265  			return nil, errors.Trace(err)
  1266  		}
  1267  
  1268  		if splitTunnel {
  1269  			return nil, errors.TraceNew(
  1270  				"unexpected split tunnel classification")
  1271  		}
  1272  
  1273  		return tunneledConn, nil
  1274  	}
  1275  
  1276  	// In split tunnel mode, TCP port forwards to destinations in the same
  1277  	// country as the client are untunneled.
  1278  	//
  1279  	// Split tunnel is implemented with assistence from the server to classify
  1280  	// destinations as being in the same country as the client. The server knows
  1281  	// the client's public IP GeoIP data, and, for clients with split tunnel mode
  1282  	// enabled, the server resolves the port forward destination address and
  1283  	// checks the destination IP GeoIP data.
  1284  	//
  1285  	// When the countries match, the server "rejects" the port forward with a
  1286  	// distinct response that indicates to the client that an untunneled port
  1287  	// foward should be established locally.
  1288  	//
  1289  	// The client maintains a classification cache that allows it to make
  1290  	// untunneled port forwards without requiring a round trip to the server.
  1291  	// Only destinations classified as untunneled are stored in the cache: a
  1292  	// destination classified as tunneled requires the same round trip as an
  1293  	// unknown destination.
  1294  	//
  1295  	// When the countries do not match, the server establishes a port forward, as
  1296  	// it does for all port forwards in non-split tunnel mode. There is no
  1297  	// additional round trip for tunneled port forwards.
  1298  
  1299  	splitTunnelHost, _, err := net.SplitHostPort(remoteAddr)
  1300  	if err != nil {
  1301  		return nil, errors.Trace(err)
  1302  	}
  1303  
  1304  	untunneledCache := controller.untunneledSplitTunnelClassifications
  1305  
  1306  	// If the destination hostname is in the untunneled split tunnel
  1307  	// classifications cache, skip the round trip to the server and do the
  1308  	// direct, untunneled dial immediately.
  1309  	_, cachedUntunneled := untunneledCache.Get(splitTunnelHost)
  1310  
  1311  	if !cachedUntunneled {
  1312  
  1313  		tunneledConn, splitTunnel, err := tunnel.DialTCPChannel(
  1314  			remoteAddr, false, downstreamConn)
  1315  		if err != nil {
  1316  			return nil, errors.Trace(err)
  1317  		}
  1318  
  1319  		if !splitTunnel {
  1320  
  1321  			// Clear any cached untunneled classification entry for this destination
  1322  			// hostname, as the server is now classifying it as tunneled.
  1323  			untunneledCache.Delete(splitTunnelHost)
  1324  
  1325  			return tunneledConn, nil
  1326  		}
  1327  
  1328  		// The server has indicated that the client should make a direct,
  1329  		// untunneled dial. Cache the classification to avoid this round trip in
  1330  		// the immediate future.
  1331  		untunneledCache.Add(splitTunnelHost, true, lrucache.DefaultExpiration)
  1332  	}
  1333  
  1334  	NoticeUntunneled(splitTunnelHost)
  1335  
  1336  	untunneledConn, err := controller.DirectDial(remoteAddr)
  1337  	if err != nil {
  1338  		return nil, errors.Trace(err)
  1339  	}
  1340  
  1341  	return untunneledConn, nil
  1342  }
  1343  
  1344  // DirectDial dials an untunneled TCP connection within the controller run context.
  1345  func (controller *Controller) DirectDial(remoteAddr string) (conn net.Conn, err error) {
  1346  	return DialTCP(controller.runCtx, remoteAddr, controller.untunneledDialConfig)
  1347  }
  1348  
  1349  // triggerFetches signals RSL, OSL, and upgrade download fetchers to begin, if
  1350  // not already running. triggerFetches is called when tunnel establishment
  1351  // fails to complete within a deadline and in other cases where local
  1352  // circumvention capabilities are lacking and we may require new server
  1353  // entries or client versions with new capabilities.
  1354  func (controller *Controller) triggerFetches() {
  1355  
  1356  	// Trigger a common remote server list fetch, since we may have failed
  1357  	// to connect with all known servers. Don't block sending signal, since
  1358  	// this signal may have already been sent.
  1359  	// Don't wait for fetch remote to succeed, since it may fail and
  1360  	// enter a retry loop and we're better off trying more known servers.
  1361  	// TODO: synchronize the fetch response, so it can be incorporated
  1362  	// into the server entry iterator as soon as available.
  1363  	select {
  1364  	case controller.signalFetchCommonRemoteServerList <- struct{}{}:
  1365  	default:
  1366  	}
  1367  
  1368  	// Trigger an OSL fetch in parallel. Both fetches are run in parallel
  1369  	// so that if one out of the common RLS and OSL set is large, it doesn't
  1370  	// doesn't entirely block fetching the other.
  1371  	select {
  1372  	case controller.signalFetchObfuscatedServerLists <- struct{}{}:
  1373  	default:
  1374  	}
  1375  
  1376  	// Trigger an out-of-band upgrade availability check and download.
  1377  	// Since we may have failed to connect, we may benefit from upgrading
  1378  	// to a new client version with new circumvention capabilities.
  1379  	select {
  1380  	case controller.signalDownloadUpgrade <- "":
  1381  	default:
  1382  	}
  1383  }
  1384  
  1385  type protocolSelectionConstraints struct {
  1386  	useUpstreamProxy                          bool
  1387  	initialLimitTunnelProtocols               protocol.TunnelProtocols
  1388  	initialLimitTunnelProtocolsCandidateCount int
  1389  	limitTunnelProtocols                      protocol.TunnelProtocols
  1390  	limitTunnelDialPortNumbers                protocol.TunnelProtocolPortLists
  1391  	limitQUICVersions                         protocol.QUICVersions
  1392  	replayCandidateCount                      int
  1393  }
  1394  
  1395  func (p *protocolSelectionConstraints) hasInitialProtocols() bool {
  1396  	return len(p.initialLimitTunnelProtocols) > 0 && p.initialLimitTunnelProtocolsCandidateCount > 0
  1397  }
  1398  
  1399  func (p *protocolSelectionConstraints) isInitialCandidate(
  1400  	excludeIntensive bool,
  1401  	serverEntry *protocol.ServerEntry) bool {
  1402  
  1403  	return p.hasInitialProtocols() &&
  1404  		len(serverEntry.GetSupportedProtocols(
  1405  			conditionallyEnabledComponents{},
  1406  			p.useUpstreamProxy,
  1407  			p.initialLimitTunnelProtocols,
  1408  			p.limitTunnelDialPortNumbers,
  1409  			p.limitQUICVersions,
  1410  			excludeIntensive)) > 0
  1411  }
  1412  
  1413  func (p *protocolSelectionConstraints) isCandidate(
  1414  	excludeIntensive bool,
  1415  	serverEntry *protocol.ServerEntry) bool {
  1416  
  1417  	return len(serverEntry.GetSupportedProtocols(
  1418  		conditionallyEnabledComponents{},
  1419  		p.useUpstreamProxy,
  1420  		p.limitTunnelProtocols,
  1421  		p.limitTunnelDialPortNumbers,
  1422  		p.limitQUICVersions,
  1423  		excludeIntensive)) > 0
  1424  }
  1425  
  1426  func (p *protocolSelectionConstraints) canReplay(
  1427  	connectTunnelCount int,
  1428  	excludeIntensive bool,
  1429  	serverEntry *protocol.ServerEntry,
  1430  	replayProtocol string) bool {
  1431  
  1432  	if p.replayCandidateCount != -1 && connectTunnelCount > p.replayCandidateCount {
  1433  		return false
  1434  	}
  1435  
  1436  	return common.Contains(
  1437  		p.supportedProtocols(connectTunnelCount, excludeIntensive, serverEntry),
  1438  		replayProtocol)
  1439  }
  1440  
  1441  func (p *protocolSelectionConstraints) supportedProtocols(
  1442  	connectTunnelCount int,
  1443  	excludeIntensive bool,
  1444  	serverEntry *protocol.ServerEntry) []string {
  1445  
  1446  	limitTunnelProtocols := p.limitTunnelProtocols
  1447  
  1448  	if len(p.initialLimitTunnelProtocols) > 0 &&
  1449  		p.initialLimitTunnelProtocolsCandidateCount > connectTunnelCount {
  1450  
  1451  		limitTunnelProtocols = p.initialLimitTunnelProtocols
  1452  	}
  1453  
  1454  	return serverEntry.GetSupportedProtocols(
  1455  		conditionallyEnabledComponents{},
  1456  		p.useUpstreamProxy,
  1457  		limitTunnelProtocols,
  1458  		p.limitTunnelDialPortNumbers,
  1459  		p.limitQUICVersions,
  1460  		excludeIntensive)
  1461  }
  1462  
  1463  func (p *protocolSelectionConstraints) selectProtocol(
  1464  	connectTunnelCount int,
  1465  	excludeIntensive bool,
  1466  	serverEntry *protocol.ServerEntry) (string, bool) {
  1467  
  1468  	candidateProtocols := p.supportedProtocols(connectTunnelCount, excludeIntensive, serverEntry)
  1469  
  1470  	if len(candidateProtocols) == 0 {
  1471  		return "", false
  1472  	}
  1473  
  1474  	// Pick at random from the supported protocols. This ensures that we'll
  1475  	// eventually try all possible protocols. Depending on network
  1476  	// configuration, it may be the case that some protocol is only available
  1477  	// through multi-capability servers, and a simpler ranked preference of
  1478  	// protocols could lead to that protocol never being selected.
  1479  
  1480  	index := prng.Intn(len(candidateProtocols))
  1481  
  1482  	return candidateProtocols[index], true
  1483  
  1484  }
  1485  
  1486  type candidateServerEntry struct {
  1487  	serverEntry                *protocol.ServerEntry
  1488  	isServerAffinityCandidate  bool
  1489  	adjustedEstablishStartTime time.Time
  1490  }
  1491  
  1492  // startEstablishing creates a pool of worker goroutines which will
  1493  // attempt to establish tunnels to candidate servers. The candidates
  1494  // are generated by another goroutine.
  1495  func (controller *Controller) startEstablishing() {
  1496  	if controller.isEstablishing {
  1497  		return
  1498  	}
  1499  	NoticeInfo("start establishing")
  1500  
  1501  	// establishStartTime is used to calculate and report the client's tunnel
  1502  	// establishment duration. Establishment duration should include all
  1503  	// initialization in launchEstablishing and establishCandidateGenerator,
  1504  	// including any potentially long-running datastore iterations.
  1505  	establishStartTime := time.Now()
  1506  
  1507  	controller.concurrentEstablishTunnelsMutex.Lock()
  1508  	controller.establishConnectTunnelCount = 0
  1509  	controller.concurrentEstablishTunnels = 0
  1510  	controller.concurrentIntensiveEstablishTunnels = 0
  1511  	controller.peakConcurrentEstablishTunnels = 0
  1512  	controller.peakConcurrentIntensiveEstablishTunnels = 0
  1513  	controller.concurrentEstablishTunnelsMutex.Unlock()
  1514  
  1515  	DoGarbageCollection()
  1516  	emitMemoryMetrics()
  1517  
  1518  	// The establish context cancelFunc, controller.stopEstablish, is called in
  1519  	// controller.stopEstablishing.
  1520  
  1521  	controller.isEstablishing = true
  1522  	controller.establishStartTime = establishStartTime
  1523  	controller.establishCtx, controller.stopEstablish = context.WithCancel(controller.runCtx)
  1524  	controller.establishWaitGroup = new(sync.WaitGroup)
  1525  	controller.candidateServerEntries = make(chan *candidateServerEntry)
  1526  
  1527  	// The server affinity mechanism attempts to favor the previously
  1528  	// used server when reconnecting. This is beneficial for user
  1529  	// applications which expect consistency in user IP address (for
  1530  	// example, a web site which prompts for additional user
  1531  	// authentication when the IP address changes).
  1532  	//
  1533  	// Only the very first server, as determined by
  1534  	// datastore.PromoteServerEntry(), is the server affinity candidate.
  1535  	// Concurrent connections attempts to many servers are launched
  1536  	// without delay, in case the affinity server connection fails.
  1537  	// While the affinity server connection is outstanding, when any
  1538  	// other connection is established, there is a short grace period
  1539  	// delay before delivering the established tunnel; this allows some
  1540  	// time for the affinity server connection to succeed first.
  1541  	// When the affinity server connection fails, any other established
  1542  	// tunnel is registered without delay.
  1543  	//
  1544  	// Note: the establishTunnelWorker that receives the affinity
  1545  	// candidate is solely resonsible for closing
  1546  	// controller.serverAffinityDoneBroadcast.
  1547  	controller.serverAffinityDoneBroadcast = make(chan struct{})
  1548  
  1549  	controller.establishWaitGroup.Add(1)
  1550  	go controller.launchEstablishing()
  1551  }
  1552  
  1553  func (controller *Controller) launchEstablishing() {
  1554  
  1555  	defer controller.establishWaitGroup.Done()
  1556  
  1557  	// Before starting the establish tunnel workers, get and apply
  1558  	// tactics, launching a tactics request if required.
  1559  	//
  1560  	// Wait only TacticsWaitPeriod for the tactics request to complete (or
  1561  	// fail) before proceeding with tunnel establishment, in case the tactics
  1562  	// request is blocked or takes very long to complete.
  1563  	//
  1564  	// An in-flight tactics request uses meek in round tripper mode, which
  1565  	// uses less resources than meek tunnel relay mode. For this reason, the
  1566  	// tactics request is not counted in concurrentIntensiveEstablishTunnels.
  1567  	//
  1568  	// TODO: HTTP/2 uses significantly more memory, so perhaps
  1569  	// concurrentIntensiveEstablishTunnels should be counted in that case.
  1570  	//
  1571  	// Any in-flight tactics request or pending retry will be
  1572  	// canceled when establishment is stopped.
  1573  
  1574  	if !controller.config.DisableTactics {
  1575  
  1576  		timeout := controller.config.GetParameters().Get().Duration(
  1577  			parameters.TacticsWaitPeriod)
  1578  
  1579  		tacticsDone := make(chan struct{})
  1580  		tacticsWaitPeriod := time.NewTimer(timeout)
  1581  		defer tacticsWaitPeriod.Stop()
  1582  
  1583  		controller.establishWaitGroup.Add(1)
  1584  		go func() {
  1585  			defer controller.establishWaitGroup.Done()
  1586  			defer close(tacticsDone)
  1587  			GetTactics(controller.establishCtx, controller.config)
  1588  		}()
  1589  
  1590  		select {
  1591  		case <-tacticsDone:
  1592  		case <-tacticsWaitPeriod.C:
  1593  		}
  1594  
  1595  		tacticsWaitPeriod.Stop()
  1596  
  1597  		if controller.isStopEstablishing() {
  1598  			// This check isn't strictly required but avoids the overhead of launching
  1599  			// workers if establishment stopped while awaiting a tactics request.
  1600  			return
  1601  		}
  1602  	}
  1603  
  1604  	// Initial- and LimitTunnelProtocols may be set by tactics.
  1605  	//
  1606  	// These protocol limits are fixed once per establishment, for
  1607  	// consistent application of related probabilities (applied by
  1608  	// ParametersAccessor.TunnelProtocols). The
  1609  	// establishLimitTunnelProtocolsState field must be read-only after this
  1610  	// point, allowing concurrent reads by establishment workers.
  1611  
  1612  	p := controller.config.GetParameters().Get()
  1613  
  1614  	controller.protocolSelectionConstraints = &protocolSelectionConstraints{
  1615  		useUpstreamProxy:                          controller.config.UseUpstreamProxy(),
  1616  		initialLimitTunnelProtocols:               p.TunnelProtocols(parameters.InitialLimitTunnelProtocols),
  1617  		initialLimitTunnelProtocolsCandidateCount: p.Int(parameters.InitialLimitTunnelProtocolsCandidateCount),
  1618  		limitTunnelProtocols:                      p.TunnelProtocols(parameters.LimitTunnelProtocols),
  1619  
  1620  		limitTunnelDialPortNumbers: protocol.TunnelProtocolPortLists(
  1621  			p.TunnelProtocolPortLists(parameters.LimitTunnelDialPortNumbers)),
  1622  
  1623  		replayCandidateCount: p.Int(parameters.ReplayCandidateCount),
  1624  	}
  1625  
  1626  	// ConnectionWorkerPoolSize may be set by tactics.
  1627  
  1628  	workerPoolSize := p.Int(parameters.ConnectionWorkerPoolSize)
  1629  
  1630  	// When TargetServerEntry is used, override any worker pool size config or
  1631  	// tactic parameter and use a pool size of 1. The typical use case for
  1632  	// TargetServerEntry is to test a specific server with a single connection
  1633  	// attempt. Furthermore, too many concurrent attempts to connect to the
  1634  	// same server will trigger rate limiting.
  1635  	if controller.config.TargetServerEntry != "" {
  1636  		workerPoolSize = 1
  1637  	}
  1638  
  1639  	// TunnelPoolSize may be set by tactics, subject to local constraints. A pool
  1640  	// size of one is forced in packet tunnel mode or when using a
  1641  	// TargetServerEntry. The tunnel pool size is reduced when there are
  1642  	// insufficent known server entries, within the set region and protocol
  1643  	// constraints, to satisfy the target.
  1644  	//
  1645  	// Limitations, to simplify concurrent access to shared state: a ceiling of
  1646  	// MAX_TUNNEL_POOL_SIZE is enforced by setTunnelPoolSize; the tunnel pool
  1647  	// size target is not re-adjusted after an API handshake, even though the
  1648  	// handshake response may deliver new tactics, or prune server entries which
  1649  	// were potential candidates; nor is the target re-adjusted after fetching
  1650  	// new server entries during this establishment.
  1651  
  1652  	tunnelPoolSize := p.Int(parameters.TunnelPoolSize)
  1653  	if controller.config.PacketTunnelTunFileDescriptor > 0 ||
  1654  		controller.config.TargetServerEntry != "" {
  1655  		tunnelPoolSize = 1
  1656  	}
  1657  
  1658  	p.Close()
  1659  
  1660  	// Trigger CandidateServers and AvailableEgressRegions notices. By default,
  1661  	// this is an asynchronous operation, as the underlying full server entry
  1662  	// list enumeration may be a slow operation. In certain cases, where
  1663  	// candidate counts are required up front, await the result before
  1664  	// proceeding.
  1665  
  1666  	awaitResponse := tunnelPoolSize > 1 ||
  1667  		controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount > 0
  1668  
  1669  	// AvailableEgressRegions: after a fresh install, the outer client may not
  1670  	// have a list of regions to display; and LimitTunnelProtocols may reduce the
  1671  	// number of available regions.
  1672  	//
  1673  	// When the outer client receives NoticeAvailableEgressRegions and the
  1674  	// configured EgressRegion is not included in the region list, the outer
  1675  	// client _should_ stop tunnel-core and prompt the user to change the region
  1676  	// selection, as there are insufficient servers/capabilities to establish a
  1677  	// tunnel in the selected region.
  1678  	//
  1679  	// This report is delayed until after tactics are likely to be applied,
  1680  	// above; this avoids a ReportAvailableRegions reporting too many regions,
  1681  	// followed shortly by a ReportAvailableRegions reporting fewer regions. That
  1682  	// sequence could cause issues in the outer client UI.
  1683  	//
  1684  	// The reported regions are limited by protocolSelectionConstraints; in the
  1685  	// case where an initial limit is in place, only regions available for the
  1686  	// initial limit are reported. The initial phase will not complete if
  1687  	// EgressRegion is set such that there are no server entries with the
  1688  	// necessary protocol capabilities (either locally or from a remote server
  1689  	// list fetch).
  1690  
  1691  	// Concurrency note: controller.protocolSelectionConstraints may be
  1692  	// overwritten before serverEntriesReporter reads it, and so cannot be
  1693  	// accessed directly by serverEntriesReporter.
  1694  	reportRequest := &serverEntriesReportRequest{
  1695  		constraints: controller.protocolSelectionConstraints,
  1696  	}
  1697  
  1698  	if awaitResponse {
  1699  		// Buffer size of 1 ensures the sender, serverEntryReporter, won't block on
  1700  		// sending the response in the case where launchEstablishing exits due to
  1701  		// stopping establishment.
  1702  		reportRequest.awaitResponse = make(chan *serverEntriesReportResponse, 1)
  1703  	}
  1704  
  1705  	controller.signalServerEntriesReporter(reportRequest)
  1706  
  1707  	if awaitResponse {
  1708  
  1709  		var reportResponse *serverEntriesReportResponse
  1710  		select {
  1711  		case reportResponse = <-reportRequest.awaitResponse:
  1712  		case <-controller.establishCtx.Done():
  1713  			// The sender may be gone when shutting down, or may not send until after
  1714  			// stopping establishment.
  1715  			return
  1716  		}
  1717  		if reportResponse.err != nil {
  1718  			NoticeError("failed to report server entries: %v",
  1719  				errors.Trace(reportResponse.err))
  1720  			controller.SignalComponentFailure()
  1721  			return
  1722  		}
  1723  
  1724  		// Make adjustments based on candidate counts.
  1725  
  1726  		if tunnelPoolSize > 1 {
  1727  			// Initial canidate count is ignored as count candidates will eventually
  1728  			// become available.
  1729  			if reportResponse.candidates < tunnelPoolSize {
  1730  				tunnelPoolSize = reportResponse.candidates
  1731  			}
  1732  			if tunnelPoolSize < 1 {
  1733  				tunnelPoolSize = 1
  1734  			}
  1735  		}
  1736  		controller.setTunnelPoolSize(tunnelPoolSize)
  1737  
  1738  		// If InitialLimitTunnelProtocols is configured but cannot be satisfied,
  1739  		// skip the initial phase in this establishment. This avoids spinning,
  1740  		// unable to connect, in this case. InitialLimitTunnelProtocols is
  1741  		// intended to prioritize certain protocols, but not strictly select them.
  1742  		//
  1743  		// The candidate count check ignores egress region selection. When an egress
  1744  		// region is selected, it's the responsibility of the outer client to react
  1745  		// to the following ReportAvailableRegions output and clear the user's
  1746  		// selected region to prevent spinning, unable to connect. The initial phase
  1747  		// is skipped only when InitialLimitTunnelProtocols cannot be satisfied
  1748  		// _regardless_ of region selection.
  1749  		//
  1750  		// We presume that, in practise, most clients will have embedded server
  1751  		// entries with capabilities for most protocols; and that clients will
  1752  		// often perform RSL checks. So clients should most often have the
  1753  		// necessary capabilities to satisfy InitialLimitTunnelProtocols. When
  1754  		// this check fails, RSL/OSL/upgrade checks are triggered in order to gain
  1755  		// new capabilities.
  1756  		//
  1757  		// LimitTunnelProtocols remains a hard limit, as using prohibited
  1758  		// protocols may have some bad effect, such as a firewall blocking all
  1759  		// traffic from a host.
  1760  
  1761  		if controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount > 0 {
  1762  
  1763  			if reportResponse.initialCandidatesAnyEgressRegion == 0 {
  1764  				NoticeWarning("skipping initial limit tunnel protocols")
  1765  				controller.protocolSelectionConstraints.initialLimitTunnelProtocolsCandidateCount = 0
  1766  
  1767  				// Since we were unable to satisfy the InitialLimitTunnelProtocols
  1768  				// tactic, trigger RSL, OSL, and upgrade fetches to potentially
  1769  				// gain new capabilities.
  1770  				controller.triggerFetches()
  1771  			}
  1772  		}
  1773  	}
  1774  
  1775  	for i := 0; i < workerPoolSize; i++ {
  1776  		controller.establishWaitGroup.Add(1)
  1777  		go controller.establishTunnelWorker()
  1778  	}
  1779  
  1780  	controller.establishWaitGroup.Add(1)
  1781  	go controller.establishCandidateGenerator()
  1782  }
  1783  
  1784  // stopEstablishing signals the establish goroutines to stop and waits
  1785  // for the group to halt.
  1786  func (controller *Controller) stopEstablishing() {
  1787  	if !controller.isEstablishing {
  1788  		return
  1789  	}
  1790  	NoticeInfo("stop establishing")
  1791  	controller.stopEstablish()
  1792  	// Note: establishCandidateGenerator closes controller.candidateServerEntries
  1793  	// (as it may be sending to that channel).
  1794  	controller.establishWaitGroup.Wait()
  1795  	NoticeInfo("stopped establishing")
  1796  
  1797  	controller.isEstablishing = false
  1798  	controller.establishStartTime = time.Time{}
  1799  	controller.establishCtx = nil
  1800  	controller.stopEstablish = nil
  1801  	controller.establishWaitGroup = nil
  1802  	controller.candidateServerEntries = nil
  1803  	controller.serverAffinityDoneBroadcast = nil
  1804  
  1805  	controller.concurrentEstablishTunnelsMutex.Lock()
  1806  	peakConcurrent := controller.peakConcurrentEstablishTunnels
  1807  	peakConcurrentIntensive := controller.peakConcurrentIntensiveEstablishTunnels
  1808  	controller.establishConnectTunnelCount = 0
  1809  	controller.concurrentEstablishTunnels = 0
  1810  	controller.concurrentIntensiveEstablishTunnels = 0
  1811  	controller.peakConcurrentEstablishTunnels = 0
  1812  	controller.peakConcurrentIntensiveEstablishTunnels = 0
  1813  	controller.concurrentEstablishTunnelsMutex.Unlock()
  1814  	NoticeInfo("peak concurrent establish tunnels: %d", peakConcurrent)
  1815  	NoticeInfo("peak concurrent resource intensive establish tunnels: %d", peakConcurrentIntensive)
  1816  
  1817  	emitMemoryMetrics()
  1818  	DoGarbageCollection()
  1819  
  1820  	// Record datastore metrics after establishment, the phase which generates
  1821  	// the bulk of all datastore transactions: iterating over server entries,
  1822  	// storing new server entries, etc.
  1823  	emitDatastoreMetrics()
  1824  
  1825  	// Similarly, establishment generates the bulk of domain resolves.
  1826  	emitDNSMetrics(controller.resolver)
  1827  }
  1828  
  1829  // establishCandidateGenerator populates the candidate queue with server entries
  1830  // from the data store. Server entries are iterated in rank order, so that promoted
  1831  // servers with higher rank are priority candidates.
  1832  func (controller *Controller) establishCandidateGenerator() {
  1833  	defer controller.establishWaitGroup.Done()
  1834  	defer close(controller.candidateServerEntries)
  1835  
  1836  	// networkWaitDuration is the elapsed time spent waiting
  1837  	// for network connectivity. This duration will be excluded
  1838  	// from reported tunnel establishment duration.
  1839  	var totalNetworkWaitDuration time.Duration
  1840  
  1841  	applyServerAffinity, iterator, err := NewServerEntryIterator(controller.config)
  1842  	if err != nil {
  1843  		NoticeError("failed to iterate over candidates: %v", errors.Trace(err))
  1844  		controller.SignalComponentFailure()
  1845  		return
  1846  	}
  1847  	defer iterator.Close()
  1848  
  1849  	// TODO: reconcile server affinity scheme with multi-tunnel mode
  1850  	if controller.getTunnelPoolSize() > 1 {
  1851  		applyServerAffinity = false
  1852  	}
  1853  
  1854  	isServerAffinityCandidate := true
  1855  	if !applyServerAffinity {
  1856  		isServerAffinityCandidate = false
  1857  		close(controller.serverAffinityDoneBroadcast)
  1858  	}
  1859  
  1860  loop:
  1861  	// Repeat until stopped
  1862  	for {
  1863  
  1864  		// A "round" consists of a new shuffle of the server entries and attempted
  1865  		// connections up to the end of the server entry iterator, or
  1866  		// parameters.EstablishTunnelWorkTime elapsed. Time spent waiting for
  1867  		// network connectivity is excluded from round elapsed time.
  1868  		//
  1869  		// After a round, if parameters.EstablishTunnelWorkTime has elapsed in total
  1870  		// with no tunnel established, remote server list and upgrade checks are
  1871  		// triggered.
  1872  		//
  1873  		// A complete server entry iteration does not trigger fetches since it's
  1874  		// possible to have fewer than parameters.ConnectionWorkerPoolSize
  1875  		// candidates, in which case rounds end instantly due to the complete server
  1876  		// entry iteration. An exception is made for an empty server entry iterator;
  1877  		// in that case fetches may be triggered immediately.
  1878  		//
  1879  		// The number of server candidates may change during this loop, due to
  1880  		// remote server list fetches. Due to the performance impact, we will not
  1881  		// trigger additional, informational CandidateServer notices while in the
  1882  		// establishing loop. Clients typically re-establish often enough that we
  1883  		// will see the effect of the remote server list fetch in diagnostics.
  1884  
  1885  		roundStartTime := time.Now()
  1886  		var roundNetworkWaitDuration time.Duration
  1887  
  1888  		workTime := controller.config.GetParameters().Get().Duration(
  1889  			parameters.EstablishTunnelWorkTime)
  1890  
  1891  		candidateServerEntryCount := 0
  1892  
  1893  		// Send each iterator server entry to the establish workers
  1894  		for {
  1895  
  1896  			networkWaitStartTime := time.Now()
  1897  			if !WaitForNetworkConnectivity(
  1898  				controller.establishCtx,
  1899  				controller.config.NetworkConnectivityChecker) {
  1900  				break loop
  1901  			}
  1902  			networkWaitDuration := time.Since(networkWaitStartTime)
  1903  			roundNetworkWaitDuration += networkWaitDuration
  1904  			totalNetworkWaitDuration += networkWaitDuration
  1905  
  1906  			serverEntry, err := iterator.Next()
  1907  			if err != nil {
  1908  				NoticeError("failed to get next candidate: %v", errors.Trace(err))
  1909  				controller.SignalComponentFailure()
  1910  				break loop
  1911  			}
  1912  			if serverEntry == nil {
  1913  				// Completed this iteration
  1914  				NoticeInfo("completed server entry iteration")
  1915  				break
  1916  			}
  1917  
  1918  			if controller.config.TargetApiProtocol == protocol.PSIPHON_SSH_API_PROTOCOL &&
  1919  				!serverEntry.SupportsSSHAPIRequests() {
  1920  				continue
  1921  			}
  1922  
  1923  			candidateServerEntryCount += 1
  1924  
  1925  			// adjustedEstablishStartTime is establishStartTime shifted
  1926  			// to exclude time spent waiting for network connectivity.
  1927  			adjustedEstablishStartTime := controller.establishStartTime.Add(
  1928  				totalNetworkWaitDuration)
  1929  
  1930  			candidate := &candidateServerEntry{
  1931  				serverEntry:                serverEntry,
  1932  				isServerAffinityCandidate:  isServerAffinityCandidate,
  1933  				adjustedEstablishStartTime: adjustedEstablishStartTime,
  1934  			}
  1935  
  1936  			wasServerAffinityCandidate := isServerAffinityCandidate
  1937  
  1938  			// Note: there must be only one server affinity candidate, as it
  1939  			// closes the serverAffinityDoneBroadcast channel.
  1940  			isServerAffinityCandidate = false
  1941  
  1942  			// TODO: here we could generate multiple candidates from the
  1943  			// server entry when there are many MeekFrontingAddresses.
  1944  
  1945  			select {
  1946  			case controller.candidateServerEntries <- candidate:
  1947  			case <-controller.establishCtx.Done():
  1948  				break loop
  1949  			}
  1950  
  1951  			if time.Since(roundStartTime)-roundNetworkWaitDuration > workTime {
  1952  				// Start over, after a brief pause, with a new shuffle of the server
  1953  				// entries, and potentially some newly fetched server entries.
  1954  				break
  1955  			}
  1956  
  1957  			if wasServerAffinityCandidate {
  1958  
  1959  				// Don't start the next candidate until either the server affinity
  1960  				// candidate has completed (success or failure) or is still working
  1961  				// and the grace period has elapsed.
  1962  
  1963  				gracePeriod := controller.config.GetParameters().Get().Duration(
  1964  					parameters.EstablishTunnelServerAffinityGracePeriod)
  1965  
  1966  				if gracePeriod > 0 {
  1967  					timer := time.NewTimer(gracePeriod)
  1968  					select {
  1969  					case <-timer.C:
  1970  					case <-controller.serverAffinityDoneBroadcast:
  1971  					case <-controller.establishCtx.Done():
  1972  						timer.Stop()
  1973  						break loop
  1974  					}
  1975  					timer.Stop()
  1976  				}
  1977  			}
  1978  		}
  1979  
  1980  		// Free up resources now, but don't reset until after the pause.
  1981  		iterator.Close()
  1982  
  1983  		// Trigger RSL, OSL, and upgrade checks after failing to establish a
  1984  		// tunnel within parameters.EstablishTunnelWorkTime, or if there are
  1985  		// no server entries present.
  1986  		//
  1987  		// While the trigger is made after each round,
  1988  		// parameter.FetchRemoteServerListStalePeriod will limit the actual
  1989  		// frequency of fetches. Continuing to trigger allows for very long running
  1990  		// establishments to perhaps eventually succeed.
  1991  		//
  1992  		// No fetches are triggered when TargetServerEntry is specified. In that
  1993  		// case, we're only trying to connect to a specific server entry.
  1994  
  1995  		if (candidateServerEntryCount == 0 ||
  1996  			time.Since(controller.establishStartTime)-totalNetworkWaitDuration > workTime) &&
  1997  			controller.config.TargetServerEntry == "" {
  1998  
  1999  			controller.triggerFetches()
  2000  		}
  2001  
  2002  		// After a complete iteration of candidate servers, pause before iterating again.
  2003  		// This helps avoid some busy wait loop conditions, and also allows some time for
  2004  		// network conditions to change. Also allows for fetch remote to complete,
  2005  		// in typical conditions (it isn't strictly necessary to wait for this, there will
  2006  		// be more rounds if required).
  2007  
  2008  		p := controller.config.GetParameters().Get()
  2009  		timeout := prng.JitterDuration(
  2010  			p.Duration(parameters.EstablishTunnelPausePeriod),
  2011  			p.Float(parameters.EstablishTunnelPausePeriodJitter))
  2012  		p.Close()
  2013  
  2014  		timer := time.NewTimer(timeout)
  2015  		select {
  2016  		case <-timer.C:
  2017  			// Retry iterating
  2018  		case <-controller.establishCtx.Done():
  2019  			timer.Stop()
  2020  			break loop
  2021  		}
  2022  		timer.Stop()
  2023  
  2024  		iterator.Reset()
  2025  	}
  2026  }
  2027  
  2028  // establishTunnelWorker pulls candidates from the candidate queue, establishes
  2029  // a connection to the tunnel server, and delivers the connected tunnel to a channel.
  2030  func (controller *Controller) establishTunnelWorker() {
  2031  	defer controller.establishWaitGroup.Done()
  2032  loop:
  2033  	for candidateServerEntry := range controller.candidateServerEntries {
  2034  
  2035  		// Note: don't receive from candidateServerEntries and isStopEstablishing
  2036  		// in the same select, since we want to prioritize receiving the stop signal
  2037  		if controller.isStopEstablishing() {
  2038  			break loop
  2039  		}
  2040  
  2041  		// There may already be a tunnel to this candidate. If so, skip it.
  2042  		if controller.isActiveTunnelServerEntry(candidateServerEntry.serverEntry) {
  2043  			continue
  2044  		}
  2045  
  2046  		// TODO: we allow multiple, concurrent workers to attempt to connect to the
  2047  		// same server. This is not wasteful if the server supports several
  2048  		// different protocols, some of which may be blocked while others are not
  2049  		// blocked. Limiting protocols with [Initial]LimitTunnelProtocols may make
  2050  		// these multiple attempts redundent. Also, replay should be used only by
  2051  		// the first attempt.
  2052  
  2053  		// upstreamProxyErrorCallback will post NoticeUpstreamProxyError when the
  2054  		// tunnel dial fails due to an upstream proxy error. As the upstream proxy
  2055  		// is user configured, the error message may need to be relayed to the user.
  2056  
  2057  		// As the callback may be invoked after establishment is over (e.g., if an
  2058  		// initial dial isn't fully shutdown when ConnectTunnel returns; or a meek
  2059  		// underlying TCP connection re-dial) don't access these variables
  2060  		// directly.
  2061  		callbackCandidateServerEntry := candidateServerEntry
  2062  		callbackEstablishCtx := controller.establishCtx
  2063  
  2064  		upstreamProxyErrorCallback := func(err error) {
  2065  
  2066  			// Do not post the notice when overall establishment context is canceled or
  2067  			// timed-out: the upstream proxy connection error is likely a result of the
  2068  			// cancellation, and not a condition to be fixed by the user. In the case
  2069  			// of meek underlying TCP connection re-dials, this condition will always
  2070  			// be true; however in this case the initial dial succeeded with the
  2071  			// current upstream proxy settings, so any upstream proxy error is
  2072  			// transient.
  2073  			if callbackEstablishCtx.Err() != nil {
  2074  				return
  2075  			}
  2076  
  2077  			// Another class of non-fatal upstream proxy error arises from proxies
  2078  			// which limit permitted proxied ports. In this case, some tunnels may fail
  2079  			// due to dial port, while others may eventually succeed. To avoid this
  2080  			// class of errors, delay posting the notice. If the upstream proxy works,
  2081  			// _some_ tunnel should connect. If the upstream proxy configuration is
  2082  			// broken, the error should persist and eventually get posted.
  2083  
  2084  			p := controller.config.GetParameters().Get()
  2085  			workerPoolSize := p.Int(parameters.ConnectionWorkerPoolSize)
  2086  			minWaitDuration := p.Duration(parameters.UpstreamProxyErrorMinWaitDuration)
  2087  			maxWaitDuration := p.Duration(parameters.UpstreamProxyErrorMaxWaitDuration)
  2088  			p.Close()
  2089  
  2090  			controller.concurrentEstablishTunnelsMutex.Lock()
  2091  			establishConnectTunnelCount := controller.establishConnectTunnelCount
  2092  			controller.concurrentEstablishTunnelsMutex.Unlock()
  2093  
  2094  			// Delay UpstreamProxyErrorMinWaitDuration (excluding time spent waiting
  2095  			// for network connectivity) and then until either
  2096  			// UpstreamProxyErrorMaxWaitDuration has elapsed or, to post sooner if many
  2097  			// candidates are failing, at least workerPoolSize tunnel connection
  2098  			// attempts have completed. We infer that at least workerPoolSize
  2099  			// candidates have completed by checking that at least 2*workerPoolSize
  2100  			// candidates have started.
  2101  
  2102  			elapsedTime := time.Since(
  2103  				callbackCandidateServerEntry.adjustedEstablishStartTime)
  2104  
  2105  			if elapsedTime < minWaitDuration ||
  2106  				(elapsedTime < maxWaitDuration &&
  2107  					establishConnectTunnelCount < 2*workerPoolSize) {
  2108  				return
  2109  			}
  2110  
  2111  			NoticeUpstreamProxyError(err)
  2112  		}
  2113  
  2114  		// Select the tunnel protocol. The selection will be made at random
  2115  		// from protocols supported by the server entry, optionally limited by
  2116  		// LimitTunnelProtocols.
  2117  		//
  2118  		// When limiting concurrent resource intensive protocol connection
  2119  		// workers, and at the limit, do not select resource intensive
  2120  		// protocols since otherwise the candidate must be skipped.
  2121  		//
  2122  		// If at the limit and unabled to select a non-intensive protocol,
  2123  		// skip the candidate entirely and move on to the next. Since
  2124  		// candidates are shuffled it's likely that the next candidate is not
  2125  		// intensive. In this case, a StaggerConnectionWorkersMilliseconds
  2126  		// delay may still be incurred.
  2127  
  2128  		limitIntensiveConnectionWorkers := controller.config.GetParameters().Get().Int(
  2129  			parameters.LimitIntensiveConnectionWorkers)
  2130  
  2131  		controller.concurrentEstablishTunnelsMutex.Lock()
  2132  
  2133  		excludeIntensive := false
  2134  		if limitIntensiveConnectionWorkers > 0 &&
  2135  			controller.concurrentIntensiveEstablishTunnels >= limitIntensiveConnectionWorkers {
  2136  			excludeIntensive = true
  2137  		}
  2138  
  2139  		canReplay := func(serverEntry *protocol.ServerEntry, replayProtocol string) bool {
  2140  			return controller.protocolSelectionConstraints.canReplay(
  2141  				controller.establishConnectTunnelCount,
  2142  				excludeIntensive,
  2143  				serverEntry,
  2144  				replayProtocol)
  2145  		}
  2146  
  2147  		selectProtocol := func(serverEntry *protocol.ServerEntry) (string, bool) {
  2148  			return controller.protocolSelectionConstraints.selectProtocol(
  2149  				controller.establishConnectTunnelCount,
  2150  				excludeIntensive,
  2151  				serverEntry)
  2152  		}
  2153  
  2154  		// MakeDialParameters may return a replay instance, if the server
  2155  		// entry has a previous, recent successful connection and
  2156  		// tactics/config has not changed.
  2157  		//
  2158  		// In the first round -- and later rounds, with some probability -- of
  2159  		// establishing, ServerEntryIterator will move potential replay candidates
  2160  		// to the front of the iterator after the random shuffle, which greatly
  2161  		// prioritizes previously successful servers for that round.
  2162  		//
  2163  		// As ServerEntryIterator does not unmarshal and validate replay
  2164  		// candidate dial parameters, some potential replay candidates may
  2165  		// have expired or otherwise ineligible dial parameters; in this case
  2166  		// the candidate proceeds without replay.
  2167  		//
  2168  		// The ReplayCandidateCount tactic determines how many candidates may use
  2169  		// replay. After ReplayCandidateCount candidates of any type, replay or no,
  2170  		// replay is skipped. If ReplayCandidateCount exceeds the intial round,
  2171  		// replay may still be performed but the iterator may no longer move
  2172  		// potential replay server entries to the front. When ReplayCandidateCount
  2173  		// is set to -1, unlimited candidates may use replay.
  2174  
  2175  		dialParams, err := MakeDialParameters(
  2176  			controller.config,
  2177  			upstreamProxyErrorCallback,
  2178  			canReplay,
  2179  			selectProtocol,
  2180  			candidateServerEntry.serverEntry,
  2181  			false,
  2182  			controller.establishConnectTunnelCount,
  2183  			int(atomic.LoadInt32(&controller.establishedTunnelsCount)))
  2184  		if dialParams == nil || err != nil {
  2185  
  2186  			controller.concurrentEstablishTunnelsMutex.Unlock()
  2187  
  2188  			// MakeDialParameters returns nil/nil when the server entry is to
  2189  			// be skipped. See MakeDialParameters for skip cases and skip
  2190  			// logging. Silently fail the candidate in this case. Otherwise,
  2191  			// emit error.
  2192  			if err != nil {
  2193  				NoticeInfo("failed to make dial parameters for %s: %v",
  2194  					candidateServerEntry.serverEntry.GetDiagnosticID(),
  2195  					errors.Trace(err))
  2196  			}
  2197  
  2198  			// Unblock other candidates immediately when server affinity
  2199  			// candidate is skipped.
  2200  			if candidateServerEntry.isServerAffinityCandidate {
  2201  				close(controller.serverAffinityDoneBroadcast)
  2202  			}
  2203  
  2204  			continue
  2205  		}
  2206  
  2207  		// Increment establishConnectTunnelCount only after selectProtocol has
  2208  		// succeeded to ensure InitialLimitTunnelProtocolsCandidateCount
  2209  		// candidates use InitialLimitTunnelProtocols.
  2210  		establishConnectTunnelCount := controller.establishConnectTunnelCount
  2211  		controller.establishConnectTunnelCount += 1
  2212  
  2213  		isIntensive := protocol.TunnelProtocolIsResourceIntensive(dialParams.TunnelProtocol)
  2214  
  2215  		if isIntensive {
  2216  			controller.concurrentIntensiveEstablishTunnels += 1
  2217  			if controller.concurrentIntensiveEstablishTunnels > controller.peakConcurrentIntensiveEstablishTunnels {
  2218  				controller.peakConcurrentIntensiveEstablishTunnels = controller.concurrentIntensiveEstablishTunnels
  2219  			}
  2220  		}
  2221  		controller.concurrentEstablishTunnels += 1
  2222  		if controller.concurrentEstablishTunnels > controller.peakConcurrentEstablishTunnels {
  2223  			controller.peakConcurrentEstablishTunnels = controller.concurrentEstablishTunnels
  2224  		}
  2225  
  2226  		controller.concurrentEstablishTunnelsMutex.Unlock()
  2227  
  2228  		// Apply stagger only now that we're past MakeDialParameters and
  2229  		// protocol selection logic which may have caused the candidate to be
  2230  		// skipped. The stagger logic delays dialing, and we don't want to
  2231  		// incur that delay that when skipping.
  2232  		//
  2233  		// Locking staggerMutex serializes staggers, so that multiple workers
  2234  		// don't simply sleep in parallel.
  2235  		//
  2236  		// The stagger is applied when establishConnectTunnelCount > 0 -- that
  2237  		// is, for all but the first dial.
  2238  
  2239  		p := controller.config.GetParameters().Get()
  2240  		staggerPeriod := p.Duration(parameters.StaggerConnectionWorkersPeriod)
  2241  		staggerJitter := p.Float(parameters.StaggerConnectionWorkersJitter)
  2242  		p.Close()
  2243  
  2244  		if establishConnectTunnelCount > 0 && staggerPeriod != 0 {
  2245  			controller.staggerMutex.Lock()
  2246  			timer := time.NewTimer(prng.JitterDuration(staggerPeriod, staggerJitter))
  2247  			select {
  2248  			case <-timer.C:
  2249  			case <-controller.establishCtx.Done():
  2250  			}
  2251  			timer.Stop()
  2252  			controller.staggerMutex.Unlock()
  2253  		}
  2254  
  2255  		// ConnectTunnel will allocate significant memory, so first attempt to
  2256  		// reclaim as much as possible.
  2257  		DoGarbageCollection()
  2258  
  2259  		tunnel, err := ConnectTunnel(
  2260  			controller.establishCtx,
  2261  			controller.config,
  2262  			candidateServerEntry.adjustedEstablishStartTime,
  2263  			dialParams)
  2264  
  2265  		controller.concurrentEstablishTunnelsMutex.Lock()
  2266  		if isIntensive {
  2267  			controller.concurrentIntensiveEstablishTunnels -= 1
  2268  		}
  2269  		controller.concurrentEstablishTunnels -= 1
  2270  		controller.concurrentEstablishTunnelsMutex.Unlock()
  2271  
  2272  		// Periodically emit memory metrics during the establishment cycle.
  2273  		if !controller.isStopEstablishing() {
  2274  			emitMemoryMetrics()
  2275  		}
  2276  
  2277  		// Immediately reclaim memory allocated by the establishment. In the case
  2278  		// of failure, first clear the reference to the tunnel. In the case of
  2279  		// success, the garbage collection may still be effective as the initial
  2280  		// phases of some protocols involve significant memory allocation that
  2281  		// could now be reclaimed.
  2282  		if err != nil {
  2283  			tunnel = nil
  2284  		}
  2285  		DoGarbageCollection()
  2286  
  2287  		if err != nil {
  2288  
  2289  			// Unblock other candidates immediately when server affinity
  2290  			// candidate fails.
  2291  			if candidateServerEntry.isServerAffinityCandidate {
  2292  				close(controller.serverAffinityDoneBroadcast)
  2293  			}
  2294  
  2295  			// Before emitting error, check if establish interrupted, in which
  2296  			// case the error is noise.
  2297  			if controller.isStopEstablishing() {
  2298  				break loop
  2299  			}
  2300  
  2301  			NoticeInfo("failed to connect to %s: %v",
  2302  				candidateServerEntry.serverEntry.GetDiagnosticID(),
  2303  				errors.Trace(err))
  2304  
  2305  			continue
  2306  		}
  2307  
  2308  		// Deliver connected tunnel.
  2309  		// Don't block. Assumes the receiver has a buffer large enough for
  2310  		// the number of desired tunnels. If there's no room, the tunnel must
  2311  		// not be required so it's discarded.
  2312  		select {
  2313  		case controller.connectedTunnels <- tunnel:
  2314  		default:
  2315  			controller.discardTunnel(tunnel)
  2316  
  2317  			// Clear the reference to this discarded tunnel and immediately run
  2318  			// a garbage collection to reclaim its memory.
  2319  			tunnel = nil
  2320  			DoGarbageCollection()
  2321  		}
  2322  
  2323  		// Unblock other candidates only after delivering when
  2324  		// server affinity candidate succeeds.
  2325  		if candidateServerEntry.isServerAffinityCandidate {
  2326  			close(controller.serverAffinityDoneBroadcast)
  2327  		}
  2328  	}
  2329  }
  2330  
  2331  func (controller *Controller) isStopEstablishing() bool {
  2332  	select {
  2333  	case <-controller.establishCtx.Done():
  2334  		return true
  2335  	default:
  2336  	}
  2337  	return false
  2338  }