github.com/cloud-foundations/dominator@v0.0.0-20221004181915-6e4fee580046/cmd/subd/main.go (about)

     1  package main
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"os/signal"
     9  	"path"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"syscall"
    14  
    15  	"github.com/Cloud-Foundations/Dominator/lib/constants"
    16  	"github.com/Cloud-Foundations/Dominator/lib/cpulimiter"
    17  	"github.com/Cloud-Foundations/Dominator/lib/filter"
    18  	"github.com/Cloud-Foundations/Dominator/lib/flags/loadflags"
    19  	"github.com/Cloud-Foundations/Dominator/lib/flagutil"
    20  	"github.com/Cloud-Foundations/Dominator/lib/format"
    21  	"github.com/Cloud-Foundations/Dominator/lib/fsbench"
    22  	"github.com/Cloud-Foundations/Dominator/lib/fsrateio"
    23  	"github.com/Cloud-Foundations/Dominator/lib/html"
    24  	"github.com/Cloud-Foundations/Dominator/lib/log/serverlogger"
    25  	"github.com/Cloud-Foundations/Dominator/lib/memstats"
    26  	"github.com/Cloud-Foundations/Dominator/lib/netspeed"
    27  	"github.com/Cloud-Foundations/Dominator/lib/rateio"
    28  	"github.com/Cloud-Foundations/Dominator/lib/srpc/setupserver"
    29  	"github.com/Cloud-Foundations/Dominator/lib/wsyscall"
    30  	"github.com/Cloud-Foundations/Dominator/proto/sub"
    31  	"github.com/Cloud-Foundations/Dominator/sub/httpd"
    32  	"github.com/Cloud-Foundations/Dominator/sub/rpcd"
    33  	"github.com/Cloud-Foundations/Dominator/sub/scanner"
    34  	"github.com/Cloud-Foundations/tricorder/go/tricorder"
    35  	"github.com/Cloud-Foundations/tricorder/go/tricorder/units"
    36  )
    37  
    38  var (
    39  	configDirectory = flag.String("configDirectory", "/etc/subd/conf.d",
    40  		"Directory of optional JSON configuration files")
    41  	defaultCpuPercent = flag.Uint("defaultCpuPercent", 0,
    42  		"CPU speed as percentage of capacity (default 50)")
    43  	defaultNetworkSpeedPercent = flag.Uint("defaultNetworkSpeedPercent", 0,
    44  		"Network speed as percentage of capacity (default 10)")
    45  	defaultScanSpeedPercent = flag.Uint("defaultScanSpeedPercent", 0,
    46  		"Scan speed as percentage of capacity (default 2)")
    47  	maxThreads = flag.Uint("maxThreads", 1,
    48  		"Maximum number of parallel OS threads to use")
    49  	permitInsecureMode = flag.Bool("permitInsecureMode", false,
    50  		"If true, run in insecure mode. This gives remote root access to all")
    51  	pidfile = flag.String("pidfile", "/var/run/subd.pid",
    52  		"Name of file to write my PID to")
    53  	portNum = flag.Uint("portNum", constants.SubPortNumber,
    54  		"Port number to allocate and listen on for HTTP/RPC")
    55  	rootDeviceBytesPerSecond flagutil.Size
    56  	rootDir                  = flag.String("rootDir", "/",
    57  		"Name of root of directory tree to manage")
    58  	scanExcludeList flagutil.StringList
    59  	showStats       = flag.Bool("showStats", false,
    60  		"If true, show statistics after each cycle")
    61  	subdDir = flag.String("subdDir", ".subd",
    62  		"Name of subd private directory, relative to rootDir. This must be on the same file-system as rootDir")
    63  	testExternallyPatchable = flag.Bool("testExternallyPatchable", false,
    64  		"If true, test if externally patchable and exit=0 if so or exit=1 if not")
    65  	unshare = flag.Bool("unshare", true, "Internal use only.")
    66  )
    67  
    68  func init() {
    69  	runtime.LockOSThread()
    70  	flag.Var(&rootDeviceBytesPerSecond, "rootDeviceBytesPerSecond",
    71  		"Fallback root device speed (default 0)")
    72  	flag.Var(&scanExcludeList, "scanExcludeList",
    73  		`Comma separated list of patterns to exclude from scanning (default `+strings.Join(constants.ScanExcludeList, ",")+`")`)
    74  }
    75  
    76  func sanityCheck() bool {
    77  	r_devnum, err := fsbench.GetDevnumForFile(*rootDir)
    78  	if err != nil {
    79  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
    80  			*rootDir, err)
    81  		return false
    82  	}
    83  	subdDirPathname := path.Join(*rootDir, *subdDir)
    84  	s_devnum, err := fsbench.GetDevnumForFile(subdDirPathname)
    85  	if err != nil {
    86  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
    87  			subdDirPathname, err)
    88  		return false
    89  	}
    90  	if r_devnum != s_devnum {
    91  		fmt.Fprintf(os.Stderr,
    92  			"rootDir and subdDir must be on the same file-system\n")
    93  		return false
    94  	}
    95  	return true
    96  }
    97  
    98  func createDirectory(dirname string) bool {
    99  	if err := os.MkdirAll(dirname, 0750); err != nil {
   100  		fmt.Fprintf(os.Stderr, "Unable to create directory: %s: %s\n",
   101  			dirname, err)
   102  		return false
   103  	}
   104  	return true
   105  }
   106  
   107  func mountTmpfs(dirname string) bool {
   108  	var statfs syscall.Statfs_t
   109  	if err := syscall.Statfs(dirname, &statfs); err != nil {
   110  		fmt.Fprintf(os.Stderr, "Unable to create Statfs: %s: %s\n",
   111  			dirname, err)
   112  		return false
   113  	}
   114  	if statfs.Type != 0x01021994 {
   115  		err := wsyscall.Mount("none", dirname, "tmpfs", 0,
   116  			"size=65536,mode=0750")
   117  		if err == nil {
   118  			fmt.Printf("Mounted tmpfs on: %s\n", dirname)
   119  		} else {
   120  			fmt.Fprintf(os.Stderr, "Unable to mount tmpfs on: %s: %s\n",
   121  				dirname, err)
   122  			return false
   123  		}
   124  	}
   125  	return true
   126  }
   127  
   128  func unshareAndBind(workingRootDir string) bool {
   129  	if *unshare {
   130  		// Re-exec myself using the unshare syscall while on a locked thread.
   131  		// This hack is required because syscall.Unshare() operates on only one
   132  		// thread in the process, and Go switches execution between threads
   133  		// randomly. Thus, the namespace can be suddenly switched for running
   134  		// code. This is an aspect of Go that was not well thought out.
   135  		runtime.LockOSThread()
   136  		if err := wsyscall.UnshareMountNamespace(); err != nil {
   137  			fmt.Fprintf(os.Stderr, "Unable to unshare mount namesace: %s\n",
   138  				err)
   139  			return false
   140  		}
   141  		// Ensure the process is slightly niced. Since the Linux implementation
   142  		// of setpriority(2) only applies to a thread, not the whole process
   143  		// (contrary to the POSIX specification), do this in the pinned OS
   144  		// thread so that the whole process (after exec) will be niced.
   145  		syscall.Setpriority(syscall.PRIO_PROCESS, 0, 1)
   146  		args := append(os.Args, "-unshare=false")
   147  		if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
   148  			fmt.Fprintf(os.Stderr, "Unable to Exec:%s: %s\n", args[0], err)
   149  			return false
   150  		}
   151  	}
   152  	syscall.Unmount(workingRootDir, 0)
   153  	err := wsyscall.Mount(*rootDir, workingRootDir, "", wsyscall.MS_BIND, "")
   154  	if err != nil {
   155  		fmt.Fprintf(os.Stderr, "Unable to bind mount %s to %s: %s\n",
   156  			*rootDir, workingRootDir, err)
   157  		return false
   158  	}
   159  	// Clean up -unshare=false so that a subsequent re-exec starts from scratch.
   160  	args := make([]string, 0, len(os.Args)-1)
   161  	for _, arg := range os.Args {
   162  		if arg != "-unshare=false" {
   163  			args = append(args, arg)
   164  		}
   165  	}
   166  	os.Args = args
   167  	return true
   168  }
   169  
   170  func getCachedFsSpeed(workingRootDir string,
   171  	cacheDirname string) (bytesPerSecond, blocksPerSecond uint64,
   172  	computed, ok bool) {
   173  	bytesPerSecond = 0
   174  	blocksPerSecond = 0
   175  	devnum, err := fsbench.GetDevnumForFile(workingRootDir)
   176  	if err != nil {
   177  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
   178  			workingRootDir, err)
   179  		return 0, 0, false, false
   180  	}
   181  	fsbenchDir := path.Join(cacheDirname, "fsbench")
   182  	if !createDirectory(fsbenchDir) {
   183  		return 0, 0, false, false
   184  	}
   185  	cacheFilename := path.Join(fsbenchDir, strconv.FormatUint(devnum, 16))
   186  	file, err := os.Open(cacheFilename)
   187  	if err == nil {
   188  		n, err := fmt.Fscanf(file, "%d %d", &bytesPerSecond, &blocksPerSecond)
   189  		file.Close()
   190  		if n == 2 || err == nil {
   191  			return bytesPerSecond, blocksPerSecond, false, true
   192  		}
   193  	}
   194  	bytesPerSecond, blocksPerSecond, err = fsbench.GetReadSpeed(workingRootDir)
   195  	if err != nil {
   196  		fmt.Fprintf(os.Stderr, "Unable to measure read speed: %s\n", err)
   197  		return 0, 0, true, false
   198  	}
   199  	file, err = os.Create(cacheFilename)
   200  	if err != nil {
   201  		fmt.Fprintf(os.Stderr, "Unable to open: %s for write: %s\n",
   202  			cacheFilename, err)
   203  		return 0, 0, true, false
   204  	}
   205  	fmt.Fprintf(file, "%d %d\n", bytesPerSecond, blocksPerSecond)
   206  	file.Close()
   207  	return bytesPerSecond, blocksPerSecond, true, true
   208  }
   209  
   210  func publishFsSpeed(bytesPerSecond, blocksPerSecond uint64) {
   211  	tricorder.RegisterMetric("/root-read-speed", &bytesPerSecond,
   212  		units.BytePerSecond, "read speed of root file-system media")
   213  	tricorder.RegisterMetric("/root-block-read-speed", &blocksPerSecond,
   214  		units.None, "read speed of root file-system media in blocks/second")
   215  }
   216  
   217  func getCachedNetworkSpeed(cacheFilename string) uint64 {
   218  	if speed, ok := netspeed.GetSpeedToHost(""); ok {
   219  		return speed
   220  	}
   221  	file, err := os.Open(cacheFilename)
   222  	if err != nil {
   223  		return 0
   224  	}
   225  	defer file.Close()
   226  	var bytesPerSecond uint64
   227  	n, err := fmt.Fscanf(file, "%d", &bytesPerSecond)
   228  	if n == 1 || err == nil {
   229  		return bytesPerSecond
   230  	}
   231  	return 0
   232  }
   233  
   234  type DumpableFileSystemHistory struct {
   235  	fsh *scanner.FileSystemHistory
   236  }
   237  
   238  func (fsh *DumpableFileSystemHistory) WriteHtml(writer io.Writer) {
   239  	fs := fsh.fsh.FileSystem()
   240  	if fs == nil {
   241  		return
   242  	}
   243  	fmt.Fprintln(writer, "<pre>")
   244  	fs.List(writer)
   245  	fmt.Fprintln(writer, "</pre>")
   246  }
   247  
   248  func gracefulCleanup() {
   249  	os.Remove(*pidfile)
   250  	os.Exit(1)
   251  }
   252  
   253  func writePidfile() {
   254  	file, err := os.Create(*pidfile)
   255  	if err != nil {
   256  		fmt.Fprintln(os.Stderr, err.Error())
   257  		os.Exit(1)
   258  	}
   259  	defer file.Close()
   260  	fmt.Fprintln(file, os.Getpid())
   261  }
   262  
   263  func main() {
   264  	if err := loadflags.LoadForDaemon("subd"); err != nil {
   265  		fmt.Fprintln(os.Stderr, err)
   266  		os.Exit(1)
   267  	}
   268  	flag.Parse()
   269  	if *testExternallyPatchable {
   270  		runTestAndExit(checkExternallyPatchable)
   271  	}
   272  	tricorder.RegisterFlags()
   273  	subdDirPathname := path.Join(*rootDir, *subdDir)
   274  	workingRootDir := path.Join(subdDirPathname, "root")
   275  	objectsDir := path.Join(workingRootDir, *subdDir, "objects")
   276  	tmpDir := path.Join(subdDirPathname, "tmp")
   277  	netbenchFilename := path.Join(subdDirPathname, "netbench")
   278  	oldTriggersFilename := path.Join(subdDirPathname, "triggers.previous")
   279  	if !createDirectory(workingRootDir) {
   280  		os.Exit(1)
   281  	}
   282  	if !sanityCheck() {
   283  		os.Exit(1)
   284  	}
   285  	if !createDirectory(tmpDir) {
   286  		os.Exit(1)
   287  	}
   288  	if !mountTmpfs(tmpDir) {
   289  		os.Exit(1)
   290  	}
   291  	if !unshareAndBind(workingRootDir) {
   292  		os.Exit(1)
   293  	}
   294  	if !createDirectory(objectsDir) {
   295  		os.Exit(1)
   296  	}
   297  	runtime.GOMAXPROCS(int(*maxThreads))
   298  	logger := serverlogger.New("")
   299  	if err := setupserver.SetupTls(); err != nil {
   300  		if *permitInsecureMode {
   301  			logger.Println(err)
   302  		} else {
   303  			logger.Fatalln(err)
   304  		}
   305  	}
   306  	bytesPerSecond, blocksPerSecond, firstScan, ok := getCachedFsSpeed(
   307  		workingRootDir, tmpDir)
   308  	if !ok {
   309  		if rootDeviceBytesPerSecond < 1<<20 {
   310  			os.Exit(1)
   311  		}
   312  		bytesPerSecond = uint64(rootDeviceBytesPerSecond)
   313  		blocksPerSecond = bytesPerSecond >> 9
   314  		logger.Printf("Falling back to -rootDeviceBytesPerSecond option: %s\n",
   315  			format.FormatBytes(bytesPerSecond))
   316  	}
   317  	publishFsSpeed(bytesPerSecond, blocksPerSecond)
   318  	configParams := sub.Configuration{}
   319  	loadConfiguration(*configDirectory, &configParams, logger)
   320  	// Command-line flags override file configuration.
   321  	if *defaultCpuPercent > 0 {
   322  		configParams.CpuPercent = *defaultCpuPercent
   323  	}
   324  	if *defaultNetworkSpeedPercent > 0 {
   325  		configParams.NetworkSpeedPercent = *defaultNetworkSpeedPercent
   326  	}
   327  	if *defaultScanSpeedPercent > 0 {
   328  		configParams.ScanSpeedPercent = *defaultScanSpeedPercent
   329  	}
   330  	var configuration scanner.Configuration
   331  	configuration.CpuLimiter = cpulimiter.New(100)
   332  	configuration.DefaultCpuPercent = configParams.CpuPercent
   333  	// Apply built-in defaults if nothing specified.
   334  	if configuration.DefaultCpuPercent < 1 {
   335  		configuration.DefaultCpuPercent = constants.DefaultCpuPercent
   336  		go adjustVcpuLimit(&configuration.DefaultCpuPercent, logger)
   337  	}
   338  	if configParams.NetworkSpeedPercent < 1 {
   339  		configParams.NetworkSpeedPercent = constants.DefaultNetworkSpeedPercent
   340  	}
   341  	if configParams.ScanSpeedPercent < 1 {
   342  		configParams.ScanSpeedPercent = constants.DefaultScanSpeedPercent
   343  	}
   344  	filterLines := configParams.ScanExclusionList
   345  	if len(scanExcludeList) > 0 {
   346  		filterLines = scanExcludeList
   347  	}
   348  	if len(filterLines) < 1 {
   349  		filterLines = constants.ScanExcludeList
   350  	}
   351  	var err error
   352  	configuration.ScanFilter, err = filter.New(filterLines)
   353  	if err != nil {
   354  		fmt.Fprintf(os.Stderr, "Unable to set initial scan exclusions: %s\n",
   355  			err)
   356  		os.Exit(1)
   357  	}
   358  	configuration.FsScanContext = fsrateio.NewReaderContext(bytesPerSecond,
   359  		blocksPerSecond, uint64(configParams.ScanSpeedPercent))
   360  	defaultSpeed := configuration.FsScanContext.GetContext().SpeedPercent()
   361  	if firstScan {
   362  		configuration.FsScanContext.GetContext().SetSpeedPercent(100)
   363  	}
   364  	if *showStats {
   365  		fmt.Println(configuration.FsScanContext)
   366  	}
   367  	var fsh scanner.FileSystemHistory
   368  	mainFunc := func(fsChannel <-chan *scanner.FileSystem,
   369  		disableScanner func(disableScanner bool)) {
   370  		networkReaderContext := rateio.NewReaderContext(
   371  			getCachedNetworkSpeed(netbenchFilename),
   372  			uint64(configParams.NetworkSpeedPercent), &rateio.ReadMeasurer{})
   373  		configuration.NetworkReaderContext = networkReaderContext
   374  		invalidateNextScanObjectCache := false
   375  		rpcdHtmlWriter :=
   376  			rpcd.Setup(&configuration, &fsh, objectsDir,
   377  				workingRootDir, networkReaderContext, netbenchFilename,
   378  				oldTriggersFilename, disableScanner,
   379  				func() {
   380  					invalidateNextScanObjectCache = true
   381  					fsh.UpdateObjectCacheOnly()
   382  				},
   383  				logger)
   384  		configMetricsDir, err := tricorder.RegisterDirectory("/config")
   385  		if err != nil {
   386  			fmt.Fprintf(os.Stderr,
   387  				"Unable to create /config metrics directory: %s\n",
   388  				err)
   389  			os.Exit(1)
   390  		}
   391  		configuration.RegisterMetrics(configMetricsDir)
   392  		if err != nil {
   393  			fmt.Fprintf(os.Stderr, "Unable to create config metrics: %s\n", err)
   394  			os.Exit(1)
   395  		}
   396  		httpd.AddHtmlWriter(rpcdHtmlWriter)
   397  		httpd.AddHtmlWriter(&fsh)
   398  		httpd.AddHtmlWriter(&configuration)
   399  		httpd.AddHtmlWriter(logger)
   400  		html.RegisterHtmlWriterForPattern("/dumpFileSystem",
   401  			"Scanned File System",
   402  			&DumpableFileSystemHistory{&fsh})
   403  		if err = httpd.StartServer(*portNum, logger); err != nil {
   404  			fmt.Fprintf(os.Stderr, "Unable to create http server: %s\n", err)
   405  			os.Exit(1)
   406  		}
   407  		fsh.Update(nil)
   408  		sighupChannel := make(chan os.Signal, 1)
   409  		signal.Notify(sighupChannel, syscall.SIGHUP)
   410  		sigtermChannel := make(chan os.Signal, 1)
   411  		signal.Notify(sigtermChannel, syscall.SIGTERM, syscall.SIGINT)
   412  		writePidfile()
   413  		for iter := 0; true; {
   414  			select {
   415  			case <-sighupChannel:
   416  				logger.Printf("Caught SIGHUP: re-execing with: %v\n", os.Args)
   417  				logger.Flush()
   418  				err = syscall.Exec(os.Args[0], os.Args, os.Environ())
   419  				if err != nil {
   420  					logger.Printf("Unable to Exec:%s: %s\n", os.Args[0], err)
   421  				}
   422  			case <-sigtermChannel:
   423  				logger.Printf("Caught SIGTERM: performing graceful cleanup\n")
   424  				logger.Flush()
   425  				gracefulCleanup()
   426  			case fs := <-fsChannel:
   427  				if *showStats {
   428  					fmt.Printf("Completed cycle: %d\n", iter)
   429  				}
   430  				if invalidateNextScanObjectCache {
   431  					fs.ScanObjectCache()
   432  					invalidateNextScanObjectCache = false
   433  				}
   434  				fsh.Update(fs)
   435  				iter++
   436  				runtime.GC() // An opportune time to take out the garbage.
   437  				if *showStats {
   438  					fmt.Print(&fsh) // Use pointer to silence go vet.
   439  					fmt.Print(fsh.FileSystem())
   440  					memstats.WriteMemoryStats(os.Stdout)
   441  					fmt.Println()
   442  				}
   443  				if firstScan {
   444  					configuration.FsScanContext.GetContext().SetSpeedPercent(
   445  						defaultSpeed)
   446  					firstScan = false
   447  					if *showStats {
   448  						fmt.Println(configuration.FsScanContext)
   449  					}
   450  				}
   451  			}
   452  		}
   453  	}
   454  	scanner.StartScanning(workingRootDir, objectsDir, &configuration, logger,
   455  		mainFunc)
   456  }