bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/main.go (about)

     1  package main
     2  
     3  //go:generate go run ../../build/generate/generate.go
     4  
     5  import (
     6  	"flag"
     7  	"fmt"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	_ "net/http/pprof"
    11  	"net/url"
    12  	"os"
    13  	"os/signal"
    14  	"path/filepath"
    15  	"strings"
    16  	"syscall"
    17  	"time"
    18  
    19  	version "bosun.org/_version"
    20  	"gopkg.in/fsnotify.v1"
    21  
    22  	"bosun.org/annotate/backend"
    23  	"bosun.org/cmd/bosun/conf"
    24  	"bosun.org/cmd/bosun/conf/rule"
    25  	"bosun.org/cmd/bosun/database"
    26  	"bosun.org/cmd/bosun/expr"
    27  	"bosun.org/cmd/bosun/ping"
    28  	"bosun.org/cmd/bosun/sched"
    29  	"bosun.org/cmd/bosun/web"
    30  	"bosun.org/collect"
    31  	"bosun.org/graphite"
    32  	"bosun.org/metadata"
    33  	"bosun.org/opentsdb"
    34  	"bosun.org/slog"
    35  	"bosun.org/util"
    36  	"github.com/facebookgo/httpcontrol"
    37  	elastic6 "github.com/olivere/elastic"
    38  	elastic7 "github.com/olivere/elastic/v7"
    39  	elastic2 "gopkg.in/olivere/elastic.v3"
    40  	elastic5 "gopkg.in/olivere/elastic.v5"
    41  )
    42  
    43  type bosunHttpTransport struct {
    44  	UserAgent string
    45  	http.RoundTripper
    46  }
    47  
    48  func (t *bosunHttpTransport) RoundTrip(req *http.Request) (*http.Response, error) {
    49  	if req.Header.Get("User-Agent") == "" {
    50  		req.Header.Add("User-Agent", t.UserAgent)
    51  	}
    52  	req.Header.Add("X-Bosun-Server", util.GetHostManager().GetHostName())
    53  	return t.RoundTripper.RoundTrip(req)
    54  }
    55  
    56  var startTime time.Time
    57  
    58  func init() {
    59  	startTime = time.Now().UTC()
    60  	client := &http.Client{
    61  		Transport: &bosunHttpTransport{
    62  			"Bosun/" + version.ShortVersion(),
    63  			&httpcontrol.Transport{
    64  				Proxy:          http.ProxyFromEnvironment,
    65  				RequestTimeout: time.Minute,
    66  				MaxTries:       3,
    67  			},
    68  		},
    69  	}
    70  	http.DefaultClient = client
    71  	opentsdb.DefaultClient = client
    72  	graphite.DefaultClient = client
    73  	collect.DefaultClient = &http.Client{
    74  		Transport: &bosunHttpTransport{
    75  			"Bosun/" + version.ShortVersion(),
    76  			&httpcontrol.Transport{
    77  				RequestTimeout: time.Minute,
    78  			},
    79  		},
    80  	}
    81  	sched.DefaultClient = &http.Client{
    82  		Transport: &bosunHttpTransport{
    83  			"Bosun/" + version.ShortVersion(),
    84  			&httpcontrol.Transport{
    85  				RequestTimeout: time.Second * 5,
    86  			},
    87  		},
    88  	}
    89  }
    90  
    91  var (
    92  	flagConf     = flag.String("c", "bosun.toml", "system config file location")
    93  	flagTest     = flag.Bool("t", false, "test for valid config; exits with 0 on success, else 1")
    94  	flagWatch    = flag.Bool("w", false, "watch .go files below current directory and exit; also build typescript files on change")
    95  	flagReadonly = flag.Bool("r", false, "readonly-mode: don't write or relay any OpenTSDB metrics")
    96  	flagQuiet    = flag.Bool("q", false, "quiet-mode: don't send any notifications except from the rule test page")
    97  	flagNoChecks = flag.Bool("n", false, "no-checks: don't run the checks at the run interval")
    98  	flagDev      = flag.Bool("dev", false, "enable dev mode: use local resources; no syslog")
    99  	flagSkipLast = flag.Bool("skiplast", false, "skip loading last datapoints from and to redis: useful for speeding up bosun startup time during development")
   100  	flagVersion  = flag.Bool("version", false, "Prints the version and exits")
   101  
   102  	mains []func() // Used to hook up syslog on *nix systems
   103  )
   104  
   105  func main() {
   106  	flag.Parse()
   107  	if *flagVersion {
   108  		fmt.Println(version.GetVersionInfo("bosun"))
   109  		os.Exit(0)
   110  	}
   111  	for _, m := range mains {
   112  		m()
   113  	}
   114  	systemConf, err := conf.LoadSystemConfigFile(*flagConf)
   115  	if err != nil {
   116  		slog.Fatalf("couldn't read system configuration: %v", err)
   117  	}
   118  
   119  	util.InitHostManager(systemConf.Hostname, false)
   120  
   121  	// Check if ES version is set by getting configs on start-up.
   122  	// Because the current APIs don't return error so calling slog.Fatalf
   123  	// inside these functions (for multiple-es support).
   124  	systemConf.GetElasticContext()
   125  	systemConf.GetAnnotateElasticHosts()
   126  
   127  	sysProvider, err := systemConf.GetSystemConfProvider()
   128  	if err != nil {
   129  		slog.Fatal(err)
   130  	}
   131  	ruleConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), systemConf.EnabledBackends(), systemConf.GetRuleVars())
   132  	if err != nil {
   133  		slog.Fatalf("couldn't read rules: %v", err)
   134  	}
   135  	if *flagTest {
   136  		os.Exit(0)
   137  	}
   138  	var ruleProvider conf.RuleConfProvider = ruleConf
   139  
   140  	addrToSendTo := sysProvider.GetHTTPSListen()
   141  	proto := "https"
   142  	if addrToSendTo == "" {
   143  		addrToSendTo = sysProvider.GetHTTPListen()
   144  		proto = "http"
   145  	}
   146  	selfAddress := &url.URL{
   147  		Scheme: proto,
   148  		Host:   addrToSendTo,
   149  	}
   150  	if strings.HasPrefix(selfAddress.Host, ":") {
   151  		selfAddress.Host = "localhost" + selfAddress.Host
   152  	}
   153  
   154  	da, err := initDataAccess(sysProvider)
   155  	if err != nil {
   156  		slog.Fatal(err)
   157  	}
   158  	if sysProvider.GetMaxRenderedTemplateAge() != 0 {
   159  		go da.State().CleanupOldRenderedTemplates(time.Hour * 24 * time.Duration(sysProvider.GetMaxRenderedTemplateAge()))
   160  	}
   161  	var annotateBackend backend.Backend
   162  	if sysProvider.AnnotateEnabled() {
   163  		index := sysProvider.GetAnnotateIndex()
   164  		if index == "" {
   165  			index = "annotate"
   166  		}
   167  		config := sysProvider.GetAnnotateElasticHosts()
   168  		switch config.Version {
   169  		case expr.ESV2:
   170  			annotateBackend = backend.NewElastic2([]string(config.Hosts), config.SimpleClient, index, config.ClientOptionFuncs.([]elastic2.ClientOptionFunc))
   171  		case expr.ESV5:
   172  			annotateBackend = backend.NewElastic5([]string(config.Hosts), config.SimpleClient, index, config.ClientOptionFuncs.([]elastic5.ClientOptionFunc))
   173  		case expr.ESV6:
   174  			annotateBackend = backend.NewElastic6([]string(config.Hosts), config.SimpleClient, index, config.ClientOptionFuncs.([]elastic6.ClientOptionFunc))
   175  		case expr.ESV7:
   176  			annotateBackend = backend.NewElastic7([]string(config.Hosts), config.SimpleClient, index, config.ClientOptionFuncs.([]elastic7.ClientOptionFunc))
   177  		}
   178  		go func() {
   179  			for {
   180  				err := annotateBackend.InitBackend()
   181  				if err == nil {
   182  					return
   183  				}
   184  				slog.Warningf("could not initialize annotate backend, will try again: %v", err)
   185  				time.Sleep(time.Second * 30)
   186  			}
   187  		}()
   188  		web.AnnotateBackend = annotateBackend
   189  	}
   190  	if err := sched.Load(sysProvider, ruleProvider, da, annotateBackend, *flagSkipLast, *flagQuiet); err != nil {
   191  		slog.Fatal(err)
   192  	}
   193  	if err := metadata.InitF(false, func(k metadata.Metakey, v interface{}) error { return sched.DefaultSched.PutMetadata(k, v) }); err != nil {
   194  		slog.Fatal(err)
   195  	}
   196  	if sysProvider.GetTSDBHost() != "" {
   197  		relay := web.Relay(sysProvider.GetTSDBHost())
   198  		collect.DirectHandler = relay
   199  		if err := collect.Init(selfAddress, "bosun"); err != nil {
   200  			slog.Fatal(err)
   201  		}
   202  		tsdbHost := &url.URL{
   203  			Scheme: "http",
   204  			Host:   sysProvider.GetTSDBHost(),
   205  		}
   206  		if *flagReadonly {
   207  			rp := util.NewSingleHostProxy(tsdbHost)
   208  			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   209  				if r.URL.Path == "/api/put" {
   210  					w.WriteHeader(204)
   211  					return
   212  				}
   213  				rp.ServeHTTP(w, r)
   214  			}))
   215  			slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost)
   216  			tsdbHost, _ = url.Parse(ts.URL)
   217  			sysProvider.SetTSDBHost(tsdbHost.Host)
   218  		}
   219  	}
   220  	if systemConf.GetPing() {
   221  		go ping.PingHosts(sched.DefaultSched.Search, systemConf.GetPingDuration())
   222  	}
   223  	if sysProvider.GetInternetProxy() != "" {
   224  		web.InternetProxy, err = url.Parse(sysProvider.GetInternetProxy())
   225  		if err != nil {
   226  			slog.Fatalf("InternetProxy error: %s", err)
   227  		}
   228  	}
   229  	var cmdHook conf.SaveHook
   230  	if hookPath := sysProvider.GetCommandHookPath(); hookPath != "" {
   231  		cmdHook, err = conf.MakeSaveCommandHook(hookPath)
   232  		if err != nil {
   233  			slog.Fatal(err)
   234  		}
   235  		ruleProvider.SetSaveHook(cmdHook)
   236  	}
   237  	var reload func() error
   238  	reloading := make(chan bool, 1) // a lock that we can give up acquiring
   239  	reload = func() error {
   240  		select {
   241  		case reloading <- true:
   242  			// Got lock
   243  		default:
   244  			return fmt.Errorf("not reloading, reload in progress")
   245  		}
   246  		defer func() {
   247  			<-reloading
   248  		}()
   249  		newConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), sysProvider.EnabledBackends(), sysProvider.GetRuleVars())
   250  		if err != nil {
   251  			return err
   252  		}
   253  		newConf.SetSaveHook(cmdHook)
   254  		newConf.SetReload(reload)
   255  		oldSched := sched.DefaultSched
   256  		oldSearch := oldSched.Search
   257  		sched.Close(true)
   258  		sched.Reset()
   259  		newSched := sched.DefaultSched
   260  		newSched.Search = oldSearch
   261  		slog.Infoln("schedule shutdown, loading new schedule")
   262  
   263  		// Load does not set the DataAccess or Search if it is already set
   264  		if err := sched.Load(sysProvider, newConf, da, annotateBackend, *flagSkipLast, *flagQuiet); err != nil {
   265  			slog.Fatal(err)
   266  		}
   267  		web.ResetSchedule() // Signal web to point to the new DefaultSchedule
   268  		go func() {
   269  			slog.Infoln("running new schedule")
   270  			if !*flagNoChecks {
   271  				sched.Run()
   272  			}
   273  		}()
   274  		slog.Infoln("config reload complete")
   275  		return nil
   276  	}
   277  
   278  	ruleProvider.SetReload(reload)
   279  
   280  	go func() {
   281  		slog.Fatal(web.Listen(sysProvider.GetHTTPListen(), sysProvider.GetHTTPSListen(),
   282  			sysProvider.GetTLSCertFile(), sysProvider.GetTLSKeyFile(), *flagDev,
   283  			sysProvider.GetTSDBHost(), reload, sysProvider.GetAuthConf(), startTime))
   284  	}()
   285  	go func() {
   286  		if !*flagNoChecks {
   287  			sched.Run()
   288  		}
   289  	}()
   290  
   291  	go func() {
   292  		sc := make(chan os.Signal, 1)
   293  		signal.Notify(sc, os.Interrupt, syscall.SIGTERM)
   294  		killing := false
   295  		for range sc {
   296  			if killing {
   297  				slog.Infoln("Second interrupt: exiting")
   298  				os.Exit(1)
   299  			}
   300  			killing = true
   301  			go func() {
   302  				slog.Infoln("Interrupt: closing down...")
   303  				sched.Close(false)
   304  				slog.Infoln("done")
   305  				os.Exit(0)
   306  			}()
   307  		}
   308  	}()
   309  
   310  	if *flagWatch {
   311  		watch(".", "*.go", quit)
   312  		watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc)
   313  		base := filepath.Join("web", "static", "js")
   314  		watch(base, "*.ts", web.RunTsc)
   315  	}
   316  	select {}
   317  }
   318  
   319  func quit() {
   320  	slog.Error("Exiting")
   321  	os.Exit(0)
   322  }
   323  
   324  func initDataAccess(systemConf conf.SystemConfProvider) (database.DataAccess, error) {
   325  	var da database.DataAccess
   326  	if len(systemConf.GetRedisHost()) != 0 {
   327  		da = database.NewDataAccess(
   328  			systemConf.GetRedisHost(),
   329  			systemConf.IsRedisClientSetName(),
   330  			systemConf.GetRedisMasterName(),
   331  			systemConf.GetRedisDb(),
   332  			systemConf.GetRedisPassword(),
   333  		)
   334  	} else {
   335  		_, err := database.StartLedis(
   336  			systemConf.GetLedisDir(),
   337  			systemConf.GetLedisBindAddr(),
   338  		)
   339  		if err != nil {
   340  			return nil, err
   341  		}
   342  		da = database.NewDataAccess(
   343  			[]string{systemConf.GetLedisBindAddr()},
   344  			false,
   345  			"",
   346  			0,
   347  			"",
   348  		)
   349  	}
   350  	err := da.Migrate()
   351  	return da, err
   352  }
   353  
   354  func watch(root, pattern string, f func()) {
   355  	watcher, err := fsnotify.NewWatcher()
   356  	if err != nil {
   357  		slog.Fatal(err)
   358  	}
   359  	filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
   360  		if matched, err := filepath.Match(pattern, info.Name()); err != nil {
   361  			slog.Fatal(err)
   362  		} else if !matched {
   363  			return nil
   364  		}
   365  		err = watcher.Add(path)
   366  		if err != nil {
   367  			slog.Fatal(err)
   368  		}
   369  		return nil
   370  	})
   371  	slog.Infoln("watching", pattern, "in", root)
   372  	wait := time.Now()
   373  	go func() {
   374  		for {
   375  			select {
   376  			case event := <-watcher.Events:
   377  				if wait.After(time.Now()) {
   378  					continue
   379  				}
   380  				if event.Op&fsnotify.Write == fsnotify.Write {
   381  					f()
   382  					wait = time.Now().Add(time.Second * 2)
   383  				}
   384  			case err := <-watcher.Errors:
   385  				slog.Errorln("error:", err)
   386  			}
   387  		}
   388  	}()
   389  }