github.com/Cloud-Foundations/Dominator@v0.3.4/cmd/subd/main.go (about)

     1  package main
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"os/signal"
     9  	"path"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"syscall"
    14  
    15  	"github.com/Cloud-Foundations/Dominator/lib/constants"
    16  	"github.com/Cloud-Foundations/Dominator/lib/cpulimiter"
    17  	"github.com/Cloud-Foundations/Dominator/lib/filter"
    18  	"github.com/Cloud-Foundations/Dominator/lib/flags/loadflags"
    19  	"github.com/Cloud-Foundations/Dominator/lib/flagutil"
    20  	"github.com/Cloud-Foundations/Dominator/lib/format"
    21  	"github.com/Cloud-Foundations/Dominator/lib/fsbench"
    22  	"github.com/Cloud-Foundations/Dominator/lib/fsrateio"
    23  	"github.com/Cloud-Foundations/Dominator/lib/goroutine"
    24  	"github.com/Cloud-Foundations/Dominator/lib/html"
    25  	"github.com/Cloud-Foundations/Dominator/lib/log/serverlogger"
    26  	"github.com/Cloud-Foundations/Dominator/lib/memstats"
    27  	"github.com/Cloud-Foundations/Dominator/lib/netspeed"
    28  	"github.com/Cloud-Foundations/Dominator/lib/rateio"
    29  	"github.com/Cloud-Foundations/Dominator/lib/srpc"
    30  	"github.com/Cloud-Foundations/Dominator/lib/srpc/setupserver"
    31  	"github.com/Cloud-Foundations/Dominator/lib/wsyscall"
    32  	"github.com/Cloud-Foundations/Dominator/proto/sub"
    33  	"github.com/Cloud-Foundations/Dominator/sub/httpd"
    34  	"github.com/Cloud-Foundations/Dominator/sub/rpcd"
    35  	"github.com/Cloud-Foundations/Dominator/sub/scanner"
    36  	"github.com/Cloud-Foundations/tricorder/go/tricorder"
    37  	"github.com/Cloud-Foundations/tricorder/go/tricorder/units"
    38  )
    39  
    40  var (
    41  	configDirectory = flag.String("configDirectory", "/etc/subd/conf.d",
    42  		"Directory of optional JSON configuration files")
    43  	defaultCpuPercent = flag.Uint("defaultCpuPercent", 0,
    44  		"CPU speed as percentage of capacity (default 50)")
    45  	defaultNetworkSpeedPercent = flag.Uint("defaultNetworkSpeedPercent", 0,
    46  		"Network speed as percentage of capacity (default 10)")
    47  	defaultScanSpeedPercent = flag.Uint("defaultScanSpeedPercent", 0,
    48  		"Scan speed as percentage of capacity (default 2)")
    49  	disruptionManager = flag.String("disruptionManager", "",
    50  		"Path to DisruptionManager tool")
    51  	maxThreads = flag.Uint("maxThreads", 1,
    52  		"Maximum number of parallel OS threads to use")
    53  	noteGenerator = flag.String("noteGenerator", "",
    54  		"Optional command to run (usually after succesful update) to generate a short note")
    55  	permitInsecureMode = flag.Bool("permitInsecureMode", false,
    56  		"If true, run in insecure mode. This gives remote root access to all")
    57  	pidfile = flag.String("pidfile", "/var/run/subd.pid",
    58  		"Name of file to write my PID to")
    59  	portNum = flag.Uint("portNum", constants.SubPortNumber,
    60  		"Port number to allocate and listen on for HTTP/RPC")
    61  	rootDeviceBytesPerSecond flagutil.Size
    62  	rootDir                  = flag.String("rootDir", "/",
    63  		"Name of root of directory tree to manage")
    64  	scanExcludeList flagutil.StringList
    65  	showStats       = flag.Bool("showStats", false,
    66  		"If true, show statistics after each cycle")
    67  	subdDir = flag.String("subdDir", ".subd",
    68  		"Name of subd private directory, relative to rootDir. This must be on the same file-system as rootDir")
    69  	testExternallyPatchable = flag.Bool("testExternallyPatchable", false,
    70  		"If true, test if externally patchable and exit=0 if so or exit=1 if not")
    71  )
    72  
    73  func init() {
    74  	// Ensure the main goroutine runs on the startup thread.
    75  	runtime.LockOSThread()
    76  	flag.Var(&rootDeviceBytesPerSecond, "rootDeviceBytesPerSecond",
    77  		"Fallback root device speed (default 0)")
    78  	flag.Var(&scanExcludeList, "scanExcludeList",
    79  		`Comma separated list of patterns to exclude from scanning (default `+strings.Join(constants.ScanExcludeList, ",")+`")`)
    80  }
    81  
    82  func sanityCheck() bool {
    83  	r_devnum, err := fsbench.GetDevnumForFile(*rootDir)
    84  	if err != nil {
    85  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
    86  			*rootDir, err)
    87  		return false
    88  	}
    89  	subdDirPathname := path.Join(*rootDir, *subdDir)
    90  	s_devnum, err := fsbench.GetDevnumForFile(subdDirPathname)
    91  	if err != nil {
    92  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
    93  			subdDirPathname, err)
    94  		return false
    95  	}
    96  	if r_devnum != s_devnum {
    97  		fmt.Fprintf(os.Stderr,
    98  			"rootDir and subdDir must be on the same file-system\n")
    99  		return false
   100  	}
   101  	return true
   102  }
   103  
   104  func createDirectory(dirname string) bool {
   105  	if err := os.MkdirAll(dirname, 0750); err != nil {
   106  		fmt.Fprintf(os.Stderr, "Unable to create directory: %s: %s\n",
   107  			dirname, err)
   108  		return false
   109  	}
   110  	return true
   111  }
   112  
   113  func mountTmpfs(dirname string) bool {
   114  	var statfs syscall.Statfs_t
   115  	if err := syscall.Statfs(dirname, &statfs); err != nil {
   116  		fmt.Fprintf(os.Stderr, "Unable to create Statfs: %s: %s\n",
   117  			dirname, err)
   118  		return false
   119  	}
   120  	if statfs.Type != 0x01021994 {
   121  		err := wsyscall.Mount("none", dirname, "tmpfs", 0,
   122  			"size=65536,mode=0750")
   123  		if err == nil {
   124  			fmt.Printf("Mounted tmpfs on: %s\n", dirname)
   125  		} else {
   126  			fmt.Fprintf(os.Stderr, "Unable to mount tmpfs on: %s: %s\n",
   127  				dirname, err)
   128  			return false
   129  		}
   130  	}
   131  	return true
   132  }
   133  
   134  func unshareAndBind(workingRootDir string) error {
   135  	if err := wsyscall.UnshareMountNamespace(); err != nil {
   136  		return fmt.Errorf("unable to unshare mount namesace: %s\n", err)
   137  	}
   138  	syscall.Unmount(workingRootDir, 0)
   139  	err := wsyscall.Mount(*rootDir, workingRootDir, "", wsyscall.MS_BIND, "")
   140  	if err != nil {
   141  		return fmt.Errorf("unable to bind mount %s to %s: %s\n",
   142  			*rootDir, workingRootDir, err)
   143  	}
   144  	return nil
   145  }
   146  
   147  func getCachedFsSpeed(workingRootDir string,
   148  	cacheDirname string) (bytesPerSecond, blocksPerSecond uint64,
   149  	computed, ok bool) {
   150  	bytesPerSecond = 0
   151  	blocksPerSecond = 0
   152  	devnum, err := fsbench.GetDevnumForFile(workingRootDir)
   153  	if err != nil {
   154  		fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n",
   155  			workingRootDir, err)
   156  		return 0, 0, false, false
   157  	}
   158  	fsbenchDir := path.Join(cacheDirname, "fsbench")
   159  	if !createDirectory(fsbenchDir) {
   160  		return 0, 0, false, false
   161  	}
   162  	cacheFilename := path.Join(fsbenchDir, strconv.FormatUint(devnum, 16))
   163  	file, err := os.Open(cacheFilename)
   164  	if err == nil {
   165  		n, err := fmt.Fscanf(file, "%d %d", &bytesPerSecond, &blocksPerSecond)
   166  		file.Close()
   167  		if n == 2 || err == nil {
   168  			return bytesPerSecond, blocksPerSecond, false, true
   169  		}
   170  	}
   171  	bytesPerSecond, blocksPerSecond, err = fsbench.GetReadSpeed(workingRootDir)
   172  	if err != nil {
   173  		fmt.Fprintf(os.Stderr, "Unable to measure read speed: %s\n", err)
   174  		return 0, 0, true, false
   175  	}
   176  	file, err = os.Create(cacheFilename)
   177  	if err != nil {
   178  		fmt.Fprintf(os.Stderr, "Unable to open: %s for write: %s\n",
   179  			cacheFilename, err)
   180  		return 0, 0, true, false
   181  	}
   182  	fmt.Fprintf(file, "%d %d\n", bytesPerSecond, blocksPerSecond)
   183  	file.Close()
   184  	return bytesPerSecond, blocksPerSecond, true, true
   185  }
   186  
   187  func publishFsSpeed(bytesPerSecond, blocksPerSecond uint64) {
   188  	tricorder.RegisterMetric("/root-read-speed", &bytesPerSecond,
   189  		units.BytePerSecond, "read speed of root file-system media")
   190  	tricorder.RegisterMetric("/root-block-read-speed", &blocksPerSecond,
   191  		units.None, "read speed of root file-system media in blocks/second")
   192  }
   193  
   194  func getCachedNetworkSpeed(cacheFilename string) uint64 {
   195  	if speed, ok := netspeed.GetSpeedToHost(""); ok {
   196  		return speed
   197  	}
   198  	file, err := os.Open(cacheFilename)
   199  	if err != nil {
   200  		return 0
   201  	}
   202  	defer file.Close()
   203  	var bytesPerSecond uint64
   204  	n, err := fmt.Fscanf(file, "%d", &bytesPerSecond)
   205  	if n == 1 || err == nil {
   206  		return bytesPerSecond
   207  	}
   208  	return 0
   209  }
   210  
   211  type DumpableFileSystemHistory struct {
   212  	fsh *scanner.FileSystemHistory
   213  }
   214  
   215  func (fsh *DumpableFileSystemHistory) WriteHtml(writer io.Writer) {
   216  	fs := fsh.fsh.FileSystem()
   217  	if fs == nil {
   218  		return
   219  	}
   220  	fmt.Fprintln(writer, "<pre>")
   221  	fs.List(writer)
   222  	fmt.Fprintln(writer, "</pre>")
   223  }
   224  
   225  func gracefulCleanup() {
   226  	os.Remove(*pidfile)
   227  	os.Exit(1)
   228  }
   229  
   230  func writePidfile() {
   231  	file, err := os.Create(*pidfile)
   232  	if err != nil {
   233  		fmt.Fprintln(os.Stderr, err.Error())
   234  		os.Exit(1)
   235  	}
   236  	defer file.Close()
   237  	fmt.Fprintln(file, os.Getpid())
   238  }
   239  
   240  func main() {
   241  	// Ensure the startup thread is reserved for the main function.
   242  	runtime.LockOSThread()
   243  	if err := loadflags.LoadForDaemon("subd"); err != nil {
   244  		fmt.Fprintln(os.Stderr, err)
   245  		os.Exit(1)
   246  	}
   247  	flag.Parse()
   248  	if *testExternallyPatchable {
   249  		runTestAndExit(checkExternallyPatchable)
   250  	}
   251  	if err := wsyscall.SetMyPriority(1); err != nil {
   252  		fmt.Fprintln(os.Stderr, err)
   253  		os.Exit(1)
   254  	}
   255  	tricorder.RegisterFlags()
   256  	subdDirPathname := path.Join(*rootDir, *subdDir)
   257  	workingRootDir := path.Join(subdDirPathname, "root")
   258  	objectsDir := path.Join(workingRootDir, *subdDir, "objects")
   259  	tmpDir := path.Join(subdDirPathname, "tmp")
   260  	netbenchFilename := path.Join(subdDirPathname, "netbench")
   261  	oldTriggersFilename := path.Join(subdDirPathname, "triggers.previous")
   262  	if !createDirectory(workingRootDir) {
   263  		os.Exit(1)
   264  	}
   265  	if !sanityCheck() {
   266  		os.Exit(1)
   267  	}
   268  	if !createDirectory(tmpDir) {
   269  		os.Exit(1)
   270  	}
   271  	if !mountTmpfs(tmpDir) {
   272  		os.Exit(1)
   273  	}
   274  	// Create a goroutine for performing updates.
   275  	workdirGoroutine := goroutine.New()
   276  	var err error
   277  	workdirGoroutine.Run(func() { err = unshareAndBind(workingRootDir) })
   278  	if err != nil {
   279  		fmt.Fprintln(os.Stderr, err)
   280  		os.Exit(1)
   281  	}
   282  	runtime.GOMAXPROCS(int(*maxThreads))
   283  	logger := serverlogger.New("")
   284  	srpc.SetDefaultLogger(logger)
   285  	params := setupserver.Params{Logger: logger}
   286  	if err := setupserver.SetupTlsWithParams(params); err != nil {
   287  		if *permitInsecureMode {
   288  			logger.Println(err)
   289  		} else {
   290  			logger.Fatalln(err)
   291  		}
   292  	}
   293  	bytesPerSecond, blocksPerSecond, firstScan, ok := getCachedFsSpeed(
   294  		workingRootDir, tmpDir)
   295  	if !ok {
   296  		if rootDeviceBytesPerSecond < 1<<20 {
   297  			os.Exit(1)
   298  		}
   299  		bytesPerSecond = uint64(rootDeviceBytesPerSecond)
   300  		blocksPerSecond = bytesPerSecond >> 9
   301  		logger.Printf("Falling back to -rootDeviceBytesPerSecond option: %s\n",
   302  			format.FormatBytes(bytesPerSecond))
   303  	}
   304  	publishFsSpeed(bytesPerSecond, blocksPerSecond)
   305  	configParams := sub.Configuration{}
   306  	loadConfiguration(*configDirectory, &configParams, logger)
   307  	// Command-line flags override file configuration.
   308  	if *defaultCpuPercent > 0 {
   309  		configParams.CpuPercent = *defaultCpuPercent
   310  	}
   311  	if *defaultNetworkSpeedPercent > 0 {
   312  		configParams.NetworkSpeedPercent = *defaultNetworkSpeedPercent
   313  	}
   314  	if *defaultScanSpeedPercent > 0 {
   315  		configParams.ScanSpeedPercent = *defaultScanSpeedPercent
   316  	}
   317  	var configuration scanner.Configuration
   318  	configuration.CpuLimiter = cpulimiter.New(100)
   319  	configuration.DefaultCpuPercent = configParams.CpuPercent
   320  	// Apply built-in defaults if nothing specified.
   321  	if configuration.DefaultCpuPercent < 1 {
   322  		configuration.DefaultCpuPercent = constants.DefaultCpuPercent
   323  		go adjustVcpuLimit(&configuration.DefaultCpuPercent, logger)
   324  	}
   325  	if configParams.NetworkSpeedPercent < 1 {
   326  		configParams.NetworkSpeedPercent = constants.DefaultNetworkSpeedPercent
   327  	}
   328  	if configParams.ScanSpeedPercent < 1 {
   329  		configParams.ScanSpeedPercent = constants.DefaultScanSpeedPercent
   330  	}
   331  	filterLines := configParams.ScanExclusionList
   332  	if len(scanExcludeList) > 0 {
   333  		filterLines = scanExcludeList
   334  	}
   335  	if len(filterLines) < 1 {
   336  		filterLines = constants.ScanExcludeList
   337  	}
   338  	configuration.ScanFilter, err = filter.New(filterLines)
   339  	if err != nil {
   340  		fmt.Fprintf(os.Stderr, "Unable to set initial scan exclusions: %s\n",
   341  			err)
   342  		os.Exit(1)
   343  	}
   344  	configuration.FsScanContext = fsrateio.NewReaderContext(bytesPerSecond,
   345  		blocksPerSecond, uint64(configParams.ScanSpeedPercent))
   346  	defaultSpeed := configuration.FsScanContext.GetContext().SpeedPercent()
   347  	if firstScan {
   348  		configuration.FsScanContext.GetContext().SetSpeedPercent(100)
   349  	}
   350  	if *showStats {
   351  		fmt.Println(configuration.FsScanContext)
   352  	}
   353  	var fsh scanner.FileSystemHistory
   354  	mainFunc := func(fsChannel <-chan *scanner.FileSystem,
   355  		disableScanner func(disableScanner bool)) {
   356  		networkReaderContext := rateio.NewReaderContext(
   357  			getCachedNetworkSpeed(netbenchFilename),
   358  			uint64(configParams.NetworkSpeedPercent), &rateio.ReadMeasurer{})
   359  		configuration.NetworkReaderContext = networkReaderContext
   360  		invalidateNextScanObjectCache := false
   361  		rescanFunc := func() {
   362  			invalidateNextScanObjectCache = true
   363  			if err := fsh.UpdateObjectCacheOnly(); err != nil {
   364  				logger.Printf("Error updating object cache: %s\n", err)
   365  			}
   366  		}
   367  		rpcdHtmlWriter := rpcd.Setup(
   368  			rpcd.Config{
   369  				DisruptionManager:        *disruptionManager,
   370  				NetworkBenchmarkFilename: netbenchFilename,
   371  				NoteGeneratorCommand:     *noteGenerator,
   372  				ObjectsDirectoryName:     objectsDir,
   373  				OldTriggersFilename:      oldTriggersFilename,
   374  				RootDirectoryName:        workingRootDir,
   375  				SubConfiguration:         configParams,
   376  			},
   377  			rpcd.Params{
   378  				DisableScannerFunction:    disableScanner,
   379  				FileSystemHistory:         &fsh,
   380  				Logger:                    logger,
   381  				NetworkReaderContext:      networkReaderContext,
   382  				RescanObjectCacheFunction: rescanFunc,
   383  				ScannerConfiguration:      &configuration,
   384  				SubdDirectory:             subdDirPathname,
   385  				WorkdirGoroutine:          workdirGoroutine,
   386  			})
   387  		configMetricsDir, err := tricorder.RegisterDirectory("/config")
   388  		if err != nil {
   389  			fmt.Fprintf(os.Stderr,
   390  				"Unable to create /config metrics directory: %s\n",
   391  				err)
   392  			os.Exit(1)
   393  		}
   394  		configuration.RegisterMetrics(configMetricsDir)
   395  		if err != nil {
   396  			fmt.Fprintf(os.Stderr, "Unable to create config metrics: %s\n", err)
   397  			os.Exit(1)
   398  		}
   399  		httpd.AddHtmlWriter(rpcdHtmlWriter)
   400  		httpd.AddHtmlWriter(&fsh)
   401  		httpd.AddHtmlWriter(&configuration)
   402  		httpd.AddHtmlWriter(logger)
   403  		html.RegisterHtmlWriterForPattern("/dumpFileSystem",
   404  			"Scanned File System",
   405  			&DumpableFileSystemHistory{&fsh})
   406  		if err = httpd.StartServer(*portNum, logger); err != nil {
   407  			fmt.Fprintf(os.Stderr, "Unable to create http server: %s\n", err)
   408  			os.Exit(1)
   409  		}
   410  		fsh.Update(nil)
   411  		sighupChannel := make(chan os.Signal, 1)
   412  		signal.Notify(sighupChannel, syscall.SIGHUP)
   413  		sigtermChannel := make(chan os.Signal, 1)
   414  		signal.Notify(sigtermChannel, syscall.SIGTERM, syscall.SIGINT)
   415  		writePidfile()
   416  		for iter := 0; true; {
   417  			select {
   418  			case <-sighupChannel:
   419  				logger.Printf("Caught SIGHUP: re-execing with: %v\n", os.Args)
   420  				logger.Flush()
   421  				err = syscall.Exec(os.Args[0], os.Args, os.Environ())
   422  				if err != nil {
   423  					logger.Printf("Unable to Exec:%s: %s\n", os.Args[0], err)
   424  				}
   425  			case <-sigtermChannel:
   426  				logger.Printf("Caught SIGTERM: performing graceful cleanup\n")
   427  				logger.Flush()
   428  				gracefulCleanup()
   429  			case fs := <-fsChannel:
   430  				if *showStats {
   431  					fmt.Printf("Completed cycle: %d\n", iter)
   432  				}
   433  				if invalidateNextScanObjectCache {
   434  					workdirGoroutine.Run(func() {
   435  						if err := fs.ScanObjectCache(); err != nil {
   436  							logger.Printf("Error scanning object cache: %s\n",
   437  								err)
   438  						}
   439  					})
   440  					invalidateNextScanObjectCache = false
   441  				}
   442  				oldGenerationCount := fsh.GenerationCount()
   443  				oldScanCount := fsh.ScanCount()
   444  				fsh.Update(fs)
   445  				iter++
   446  				generationCount := fsh.GenerationCount()
   447  				scanCount := fsh.ScanCount()
   448  				if generationCount != oldGenerationCount {
   449  					logger.Printf("Generation count: %d, scan count: %d\n",
   450  						generationCount, scanCount)
   451  				} else if scanCount != oldScanCount {
   452  					logger.Debugf(0, "Generation count: %d, scan count: %d\n",
   453  						generationCount, scanCount)
   454  				}
   455  				runtime.GC() // An opportune time to take out the garbage.
   456  				if *showStats {
   457  					fmt.Print(&fsh) // Use pointer to silence go vet.
   458  					fmt.Print(fsh.FileSystem())
   459  					memstats.WriteMemoryStats(os.Stdout)
   460  					fmt.Println()
   461  				}
   462  				if firstScan {
   463  					configuration.FsScanContext.GetContext().SetSpeedPercent(
   464  						defaultSpeed)
   465  					firstScan = false
   466  					if *showStats {
   467  						fmt.Println(configuration.FsScanContext)
   468  					}
   469  				}
   470  			}
   471  		}
   472  	}
   473  	// Create a goroutine prior to mutating the startup thread to ensure that
   474  	// new goroutines are started from a "clean" thread.
   475  	mainGoroutine := goroutine.New()
   476  	// Setup environment for scanning.
   477  	if err := unshareAndBind(workingRootDir); err != nil {
   478  		logger.Fatalln(err)
   479  	}
   480  	if !createDirectory(objectsDir) { // Must be done after unshareAndBind().
   481  		os.Exit(1)
   482  	}
   483  	scanner.StartScanning(workingRootDir, objectsDir, &configuration, logger,
   484  		func(fsChannel <-chan *scanner.FileSystem,
   485  			disableScanner func(disableScanner bool)) {
   486  			mainGoroutine.Start(func() { mainFunc(fsChannel, disableScanner) })
   487  		})
   488  }