github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/server/services.go (about)

     1  /*
     2   * Copyright (c) 2016, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package server implements the core tunnel functionality of a Psiphon server.
    21  // The main function is RunServices, which runs one or all of a Psiphon API web server,
    22  // a tunneling SSH server, and an Obfuscated SSH protocol server. The server configuration
    23  // is created by the GenerateConfig function.
    24  package server
    25  
    26  import (
    27  	"fmt"
    28  	"math/rand"
    29  	"os"
    30  	"os/signal"
    31  	"runtime"
    32  	"runtime/debug"
    33  	"sync"
    34  	"syscall"
    35  	"time"
    36  
    37  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    38  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/buildinfo"
    39  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    40  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/osl"
    41  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/packetman"
    42  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tactics"
    43  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tun"
    44  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/psinet"
    45  )
    46  
    47  // RunServices initializes support functions including logging and GeoIP services;
    48  // and then starts the server components and runs them until os.Interrupt or
    49  // os.Kill signals are received. The config determines which components are run.
    50  func RunServices(configJSON []byte) (retErr error) {
    51  
    52  	loggingInitialized := false
    53  
    54  	defer func() {
    55  		if retErr != nil && loggingInitialized {
    56  			log.WithTraceFields(LogFields{"error": retErr}).Error("RunServices failed")
    57  		}
    58  	}()
    59  
    60  	rand.Seed(int64(time.Now().Nanosecond()))
    61  
    62  	config, err := LoadConfig(configJSON)
    63  	if err != nil {
    64  		return errors.Trace(err)
    65  	}
    66  
    67  	err = InitLogging(config)
    68  	if err != nil {
    69  		return errors.Trace(err)
    70  	}
    71  
    72  	loggingInitialized = true
    73  
    74  	support, err := NewSupportServices(config)
    75  	if err != nil {
    76  		return errors.Trace(err)
    77  	}
    78  
    79  	startupFields := buildinfo.GetBuildInfo().ToMap()
    80  	startupFields["GODEBUG"] = os.Getenv("GODEBUG")
    81  	log.WithTraceFields(startupFields).Info("startup")
    82  
    83  	waitGroup := new(sync.WaitGroup)
    84  	shutdownBroadcast := make(chan struct{})
    85  	errorChannel := make(chan error, 1)
    86  
    87  	tunnelServer, err := NewTunnelServer(support, shutdownBroadcast)
    88  	if err != nil {
    89  		return errors.Trace(err)
    90  	}
    91  
    92  	support.TunnelServer = tunnelServer
    93  
    94  	if config.RunPacketTunnel {
    95  
    96  		packetTunnelServer, err := tun.NewServer(&tun.ServerConfig{
    97  			Logger:                      CommonLogger(log),
    98  			SudoNetworkConfigCommands:   config.PacketTunnelSudoNetworkConfigCommands,
    99  			GetDNSResolverIPv4Addresses: support.DNSResolver.GetAllIPv4,
   100  			GetDNSResolverIPv6Addresses: support.DNSResolver.GetAllIPv6,
   101  			EnableDNSFlowTracking:       config.PacketTunnelEnableDNSFlowTracking,
   102  			EgressInterface:             config.PacketTunnelEgressInterface,
   103  			DownstreamPacketQueueSize:   config.PacketTunnelDownstreamPacketQueueSize,
   104  			SessionIdleExpirySeconds:    config.PacketTunnelSessionIdleExpirySeconds,
   105  			AllowBogons:                 config.AllowBogons,
   106  		})
   107  		if err != nil {
   108  			return errors.Trace(err)
   109  		}
   110  
   111  		support.PacketTunnelServer = packetTunnelServer
   112  	}
   113  
   114  	if config.RunPacketManipulator {
   115  
   116  		packetManipulatorConfig, err := makePacketManipulatorConfig(support)
   117  		if err != nil {
   118  			return errors.Trace(err)
   119  		}
   120  
   121  		packetManipulator, err := packetman.NewManipulator(packetManipulatorConfig)
   122  		if err != nil {
   123  			return errors.Trace(err)
   124  		}
   125  
   126  		support.PacketManipulator = packetManipulator
   127  	}
   128  
   129  	// After this point, errors should be delivered to the errors channel and
   130  	// orderly shutdown should flow through to the end of the function to ensure
   131  	// all workers are synchronously stopped.
   132  
   133  	if config.RunPacketTunnel {
   134  		support.PacketTunnelServer.Start()
   135  		waitGroup.Add(1)
   136  		go func() {
   137  			defer waitGroup.Done()
   138  			<-shutdownBroadcast
   139  			support.PacketTunnelServer.Stop()
   140  		}()
   141  	}
   142  
   143  	if config.RunPacketManipulator {
   144  		err := support.PacketManipulator.Start()
   145  		if err != nil {
   146  			select {
   147  			case errorChannel <- err:
   148  			default:
   149  			}
   150  		} else {
   151  			waitGroup.Add(1)
   152  			go func() {
   153  				defer waitGroup.Done()
   154  				<-shutdownBroadcast
   155  				support.PacketManipulator.Stop()
   156  			}()
   157  		}
   158  	}
   159  
   160  	if config.RunLoadMonitor() {
   161  		waitGroup.Add(1)
   162  		go func() {
   163  			waitGroup.Done()
   164  			ticker := time.NewTicker(time.Duration(config.LoadMonitorPeriodSeconds) * time.Second)
   165  			defer ticker.Stop()
   166  			for {
   167  				select {
   168  				case <-shutdownBroadcast:
   169  					return
   170  				case <-ticker.C:
   171  					logServerLoad(support)
   172  				}
   173  			}
   174  		}()
   175  	}
   176  
   177  	if config.RunPeriodicGarbageCollection() {
   178  		waitGroup.Add(1)
   179  		go func() {
   180  			waitGroup.Done()
   181  			ticker := time.NewTicker(config.periodicGarbageCollection)
   182  			defer ticker.Stop()
   183  			for {
   184  				select {
   185  				case <-shutdownBroadcast:
   186  					return
   187  				case <-ticker.C:
   188  					debug.FreeOSMemory()
   189  				}
   190  			}
   191  		}()
   192  	}
   193  
   194  	if config.RunWebServer() {
   195  		waitGroup.Add(1)
   196  		go func() {
   197  			defer waitGroup.Done()
   198  			err := RunWebServer(support, shutdownBroadcast)
   199  			select {
   200  			case errorChannel <- err:
   201  			default:
   202  			}
   203  		}()
   204  	}
   205  
   206  	// The tunnel server is always run; it launches multiple
   207  	// listeners, depending on which tunnel protocols are enabled.
   208  	waitGroup.Add(1)
   209  	go func() {
   210  		defer waitGroup.Done()
   211  		err := tunnelServer.Run()
   212  		select {
   213  		case errorChannel <- err:
   214  		default:
   215  		}
   216  	}()
   217  
   218  	// Shutdown doesn't wait for the outputProcessProfiles goroutine
   219  	// to complete, as it may be sleeping while running a "block" or
   220  	// CPU profile.
   221  	signalProcessProfiles := make(chan struct{}, 1)
   222  	go func() {
   223  		for {
   224  			select {
   225  			case <-signalProcessProfiles:
   226  				outputProcessProfiles(support.Config, "")
   227  			case <-shutdownBroadcast:
   228  				return
   229  			}
   230  		}
   231  	}()
   232  
   233  	// In addition to the actual signal handling here, there is
   234  	// a list of signals that need to be passed through panicwrap
   235  	// in 'github.com/Psiphon-Labs/psiphon-tunnel-core/Server/main.go'
   236  	// where 'panicwrap.Wrap' is called. The handled signals below, and the
   237  	// list there must be kept in sync to ensure proper signal handling
   238  
   239  	// An OS signal triggers an orderly shutdown
   240  	systemStopSignal := make(chan os.Signal, 1)
   241  	signal.Notify(systemStopSignal, os.Interrupt, syscall.SIGTERM)
   242  
   243  	// SIGUSR1 triggers a reload of support services
   244  	reloadSupportServicesSignal := makeSIGUSR1Channel()
   245  
   246  	// SIGUSR2 triggers an immediate load log and optional process profile output
   247  	logServerLoadSignal := makeSIGUSR2Channel()
   248  
   249  	// SIGTSTP triggers tunnelServer to stop establishing new tunnels
   250  	stopEstablishingTunnelsSignal := makeSIGTSTPChannel()
   251  
   252  	// SIGCONT triggers tunnelServer to resume establishing new tunnels
   253  	resumeEstablishingTunnelsSignal := makeSIGCONTChannel()
   254  
   255  	err = nil
   256  
   257  loop:
   258  	for {
   259  		select {
   260  		case <-stopEstablishingTunnelsSignal:
   261  			tunnelServer.SetEstablishTunnels(false)
   262  
   263  			if config.DumpProfilesOnStopEstablishTunnels(
   264  				tunnelServer.GetEstablishedClientCount()) {
   265  
   266  				// Run the profile dump in a goroutine and don't block this loop. Shutdown
   267  				// doesn't wait for any running outputProcessProfiles to complete.
   268  				go func() {
   269  					outputProcessProfiles(support.Config, "stop_establish_tunnels")
   270  				}()
   271  			}
   272  
   273  		case <-resumeEstablishingTunnelsSignal:
   274  			tunnelServer.SetEstablishTunnels(true)
   275  
   276  		case <-reloadSupportServicesSignal:
   277  			support.Reload()
   278  
   279  		case <-logServerLoadSignal:
   280  			// Signal profiles writes first to ensure some diagnostics are
   281  			// available in case logServerLoad hangs (which has happened
   282  			// in the past due to a deadlock bug).
   283  			select {
   284  			case signalProcessProfiles <- struct{}{}:
   285  			default:
   286  			}
   287  			logServerLoad(support)
   288  
   289  		case <-systemStopSignal:
   290  			log.WithTrace().Info("shutdown by system")
   291  			break loop
   292  
   293  		case err = <-errorChannel:
   294  			break loop
   295  		}
   296  	}
   297  
   298  	// During any delayed or hung shutdown, periodically dump profiles to help
   299  	// diagnose the cause. Shutdown doesn't wait for any running
   300  	// outputProcessProfiles to complete.
   301  	signalProfileDumperStop := make(chan struct{})
   302  	go func() {
   303  		tickSeconds := 10
   304  		ticker := time.NewTicker(time.Duration(tickSeconds) * time.Second)
   305  		defer ticker.Stop()
   306  		for i := tickSeconds; i <= 60; i += tickSeconds {
   307  			select {
   308  			case <-signalProfileDumperStop:
   309  				return
   310  			case <-ticker.C:
   311  				filenameSuffix := fmt.Sprintf("delayed_shutdown_%ds", i)
   312  				outputProcessProfiles(support.Config, filenameSuffix)
   313  			}
   314  		}
   315  	}()
   316  
   317  	close(shutdownBroadcast)
   318  	waitGroup.Wait()
   319  
   320  	close(signalProfileDumperStop)
   321  
   322  	return err
   323  }
   324  
   325  func getRuntimeMetrics() LogFields {
   326  
   327  	numGoroutine := runtime.NumGoroutine()
   328  
   329  	var memStats runtime.MemStats
   330  	runtime.ReadMemStats(&memStats)
   331  
   332  	lastGC := ""
   333  	if memStats.LastGC > 0 {
   334  		lastGC = time.Unix(0, int64(memStats.LastGC)).UTC().Format(time.RFC3339)
   335  	}
   336  
   337  	return LogFields{
   338  		"num_goroutine": numGoroutine,
   339  		"heap_alloc":    memStats.HeapAlloc,
   340  		"heap_sys":      memStats.HeapSys,
   341  		"heap_idle":     memStats.HeapIdle,
   342  		"heap_inuse":    memStats.HeapInuse,
   343  		"heap_released": memStats.HeapReleased,
   344  		"heap_objects":  memStats.HeapObjects,
   345  		"num_gc":        memStats.NumGC,
   346  		"num_forced_gc": memStats.NumForcedGC,
   347  		"last_gc":       lastGC,
   348  	}
   349  }
   350  
   351  func outputProcessProfiles(config *Config, filenameSuffix string) {
   352  
   353  	log.WithTraceFields(getRuntimeMetrics()).Info("runtime_metrics")
   354  
   355  	if config.ProcessProfileOutputDirectory != "" {
   356  		common.WriteRuntimeProfiles(
   357  			CommonLogger(log),
   358  			config.ProcessProfileOutputDirectory,
   359  			filenameSuffix,
   360  			config.ProcessBlockProfileDurationSeconds,
   361  			config.ProcessCPUProfileDurationSeconds)
   362  	}
   363  }
   364  
   365  func logServerLoad(support *SupportServices) {
   366  
   367  	serverLoad := getRuntimeMetrics()
   368  
   369  	serverLoad["event_name"] = "server_load"
   370  
   371  	establishTunnels, establishLimitedCount :=
   372  		support.TunnelServer.GetEstablishTunnelsMetrics()
   373  	serverLoad["establish_tunnels"] = establishTunnels
   374  	serverLoad["establish_tunnels_limited_count"] = establishLimitedCount
   375  
   376  	serverLoad.Add(support.ReplayCache.GetMetrics())
   377  
   378  	serverLoad.Add(support.ServerTacticsParametersCache.GetMetrics())
   379  
   380  	upstreamStats, protocolStats, regionStats :=
   381  		support.TunnelServer.GetLoadStats()
   382  
   383  	for name, value := range upstreamStats {
   384  		serverLoad[name] = value
   385  	}
   386  
   387  	for protocol, stats := range protocolStats {
   388  		serverLoad[protocol] = stats
   389  	}
   390  
   391  	log.LogRawFieldsWithTimestamp(serverLoad)
   392  
   393  	for region, regionProtocolStats := range regionStats {
   394  
   395  		serverLoad := LogFields{
   396  			"event_name": "server_load",
   397  			"region":     region,
   398  		}
   399  
   400  		for protocol, stats := range regionProtocolStats {
   401  			serverLoad[protocol] = stats
   402  		}
   403  
   404  		log.LogRawFieldsWithTimestamp(serverLoad)
   405  	}
   406  }
   407  
   408  func logIrregularTunnel(
   409  	support *SupportServices,
   410  	listenerTunnelProtocol string,
   411  	listenerPort int,
   412  	clientIP string,
   413  	tunnelError error,
   414  	logFields LogFields) {
   415  
   416  	if logFields == nil {
   417  		logFields = make(LogFields)
   418  	}
   419  
   420  	logFields["event_name"] = "irregular_tunnel"
   421  	logFields["listener_protocol"] = listenerTunnelProtocol
   422  	logFields["listener_port_number"] = listenerPort
   423  	support.GeoIPService.Lookup(clientIP).SetLogFields(logFields)
   424  	logFields["tunnel_error"] = tunnelError.Error()
   425  	log.LogRawFieldsWithTimestamp(logFields)
   426  }
   427  
   428  // SupportServices carries common and shared data components
   429  // across different server components. SupportServices implements a
   430  // hot reload of traffic rules, psinet database, and geo IP database
   431  // components, which allows these data components to be refreshed
   432  // without restarting the server process.
   433  type SupportServices struct {
   434  	Config                       *Config
   435  	TrafficRulesSet              *TrafficRulesSet
   436  	OSLConfig                    *osl.Config
   437  	PsinetDatabase               *psinet.Database
   438  	GeoIPService                 *GeoIPService
   439  	DNSResolver                  *DNSResolver
   440  	TunnelServer                 *TunnelServer
   441  	PacketTunnelServer           *tun.Server
   442  	TacticsServer                *tactics.Server
   443  	Blocklist                    *Blocklist
   444  	PacketManipulator            *packetman.Manipulator
   445  	ReplayCache                  *ReplayCache
   446  	ServerTacticsParametersCache *ServerTacticsParametersCache
   447  }
   448  
   449  // NewSupportServices initializes a new SupportServices.
   450  func NewSupportServices(config *Config) (*SupportServices, error) {
   451  
   452  	trafficRulesSet, err := NewTrafficRulesSet(config.TrafficRulesFilename)
   453  	if err != nil {
   454  		return nil, errors.Trace(err)
   455  	}
   456  
   457  	oslConfig, err := osl.NewConfig(config.OSLConfigFilename)
   458  	if err != nil {
   459  		return nil, errors.Trace(err)
   460  	}
   461  
   462  	psinetDatabase, err := psinet.NewDatabase(config.PsinetDatabaseFilename)
   463  	if err != nil {
   464  		return nil, errors.Trace(err)
   465  	}
   466  
   467  	geoIPService, err := NewGeoIPService(config.GeoIPDatabaseFilenames)
   468  	if err != nil {
   469  		return nil, errors.Trace(err)
   470  	}
   471  
   472  	dnsResolver, err := NewDNSResolver(config.DNSResolverIPAddress)
   473  	if err != nil {
   474  		return nil, errors.Trace(err)
   475  	}
   476  
   477  	blocklist, err := NewBlocklist(config.BlocklistFilename)
   478  	if err != nil {
   479  		return nil, errors.Trace(err)
   480  	}
   481  
   482  	tacticsServer, err := tactics.NewServer(
   483  		CommonLogger(log),
   484  		getTacticsAPIParameterLogFieldFormatter(),
   485  		getTacticsAPIParameterValidator(config),
   486  		config.TacticsConfigFilename)
   487  	if err != nil {
   488  		return nil, errors.Trace(err)
   489  	}
   490  
   491  	support := &SupportServices{
   492  		Config:          config,
   493  		TrafficRulesSet: trafficRulesSet,
   494  		OSLConfig:       oslConfig,
   495  		PsinetDatabase:  psinetDatabase,
   496  		GeoIPService:    geoIPService,
   497  		DNSResolver:     dnsResolver,
   498  		TacticsServer:   tacticsServer,
   499  		Blocklist:       blocklist,
   500  	}
   501  
   502  	support.ReplayCache = NewReplayCache(support)
   503  
   504  	support.ServerTacticsParametersCache =
   505  		NewServerTacticsParametersCache(support)
   506  
   507  	return support, nil
   508  }
   509  
   510  // Reload reinitializes traffic rules, psinet database, and geo IP database
   511  // components. If any component fails to reload, an error is logged and
   512  // Reload proceeds, using the previous state of the component.
   513  func (support *SupportServices) Reload() {
   514  
   515  	reloaders := append(
   516  		[]common.Reloader{
   517  			support.TrafficRulesSet,
   518  			support.OSLConfig,
   519  			support.PsinetDatabase,
   520  			support.TacticsServer,
   521  			support.Blocklist},
   522  		support.GeoIPService.Reloaders()...)
   523  
   524  	// Note: established clients aren't notified when tactics change after a
   525  	// reload; new tactics will be obtained on the next client handshake or
   526  	// tactics request.
   527  
   528  	reloadTactics := func() {
   529  
   530  		// Don't use stale tactics.
   531  		support.ReplayCache.Flush()
   532  		support.ServerTacticsParametersCache.Flush()
   533  
   534  		if support.Config.RunPacketManipulator {
   535  			err := reloadPacketManipulationSpecs(support)
   536  			if err != nil {
   537  				log.WithTraceFields(
   538  					LogFields{"error": errors.Trace(err)}).Warning(
   539  					"failed to reload packet manipulation specs")
   540  			}
   541  		}
   542  	}
   543  
   544  	// Take these actions only after the corresponding Reloader has reloaded.
   545  	// In both the traffic rules and OSL cases, there is some impact from state
   546  	// reset, so the reset should be avoided where possible.
   547  	reloadPostActions := map[common.Reloader]func(){
   548  		support.TrafficRulesSet: func() { support.TunnelServer.ResetAllClientTrafficRules() },
   549  		support.OSLConfig:       func() { support.TunnelServer.ResetAllClientOSLConfigs() },
   550  		support.TacticsServer:   reloadTactics,
   551  	}
   552  
   553  	for _, reloader := range reloaders {
   554  
   555  		if !reloader.WillReload() {
   556  			// Skip logging
   557  			continue
   558  		}
   559  
   560  		// "reloaded" flag indicates if file was actually reloaded or ignored
   561  		reloaded, err := reloader.Reload()
   562  
   563  		if reloaded {
   564  			if action, ok := reloadPostActions[reloader]; ok {
   565  				action()
   566  			}
   567  		}
   568  
   569  		if err != nil {
   570  			log.WithTraceFields(
   571  				LogFields{
   572  					"reloader": reloader.LogDescription(),
   573  					"error":    err}).Error("reload failed")
   574  			// Keep running with previous state
   575  		} else {
   576  			log.WithTraceFields(
   577  				LogFields{
   578  					"reloader": reloader.LogDescription(),
   579  					"reloaded": reloaded}).Info("reload success")
   580  		}
   581  	}
   582  }