bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/main.go (about)

     1  package main
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	_ "expvar"
     7  	"flag"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"log"
    11  	"net/http"
    12  	_ "net/http/pprof"
    13  	"net/url"
    14  	"os"
    15  	"os/signal"
    16  	"path/filepath"
    17  	"runtime"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  
    22  	version "bosun.org/_version"
    23  	"bosun.org/cmd/scollector/collectors"
    24  	"bosun.org/cmd/scollector/conf"
    25  	"bosun.org/collect"
    26  	"bosun.org/metadata"
    27  	"bosun.org/opentsdb"
    28  	"bosun.org/slog"
    29  	"bosun.org/snmp"
    30  	"bosun.org/util"
    31  	"github.com/BurntSushi/toml"
    32  	"github.com/facebookgo/httpcontrol"
    33  )
    34  
    35  var (
    36  	flagHost            = flag.String("h", "", "OpenTSDB or Bosun host to send data. Overrides Host in conf file.")
    37  	flagFilter          = flag.String("f", "", "Filters collectors matching these terms, separated by comma. Overrides Filter in conf file.")
    38  	flagList            = flag.Bool("l", false, "List available collectors.")
    39  	flagPrint           = flag.Bool("p", false, "Print to screen instead of sending to a host")
    40  	flagBatchSize       = flag.Int("b", 0, "OpenTSDB batch size. Default is 500.")
    41  	flagFake            = flag.Int("fake", 0, "Generates X fake data points on the test.fake metric per second.")
    42  	flagDebug           = flag.Bool("d", false, "Enables debug output.")
    43  	flagDisableMetadata = flag.Bool("m", false, "Disable sending of metadata.")
    44  	flagVersion         = flag.Bool("version", false, "Prints the version and exits.")
    45  	flagConf            = flag.String("conf", "", "Location of configuration file. Defaults to scollector.toml in directory of the scollector executable.")
    46  	flagToToml          = flag.String("totoml", "", "Location of destination toml file to convert. Reads from value of -conf.")
    47  	flagNtlm            = flag.Bool("useNtlm", false, "Specifies to use NTLM authentication.")
    48  
    49  	mains []func()
    50  )
    51  
    52  type scollectorHTTPTransport struct {
    53  	UserAgent string
    54  	http.RoundTripper
    55  }
    56  
    57  func (t *scollectorHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) {
    58  	if req.Header.Get("User-Agent") == "" {
    59  		req.Header.Add("User-Agent", t.UserAgent)
    60  	}
    61  	return t.RoundTripper.RoundTrip(req)
    62  }
    63  
    64  func main() {
    65  	flag.Parse()
    66  	if *flagToToml != "" {
    67  		toToml(*flagToToml)
    68  		fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)")
    69  		return
    70  	}
    71  	if *flagPrint || *flagDebug {
    72  		slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)})
    73  	}
    74  	if *flagVersion {
    75  		fmt.Println(version.GetVersionInfo("scollector"))
    76  		os.Exit(0)
    77  	}
    78  	for _, m := range mains {
    79  		m()
    80  	}
    81  	conf := readConf()
    82  	ua := "Scollector/" + version.ShortVersion()
    83  	if conf.UserAgentMessage != "" {
    84  		ua += fmt.Sprintf(" (%s)", conf.UserAgentMessage)
    85  	}
    86  	if conf.AuthToken != "" {
    87  		collect.AuthToken = conf.AuthToken
    88  		metadata.AuthToken = conf.AuthToken
    89  	}
    90  	client := &http.Client{
    91  		Transport: &scollectorHTTPTransport{
    92  			ua,
    93  			&httpcontrol.Transport{
    94  				RequestTimeout: time.Minute,
    95  			},
    96  		},
    97  	}
    98  	http.DefaultClient = client
    99  	collect.DefaultClient = client
   100  	if *flagHost != "" {
   101  		conf.Host = *flagHost
   102  	}
   103  	if *flagNtlm {
   104  		conf.UseNtlm = *flagNtlm
   105  	}
   106  	if *flagFilter != "" {
   107  		conf.Filter = strings.Split(*flagFilter, ",")
   108  	}
   109  	if !conf.Tags.Valid() {
   110  		slog.Fatalf("invalid tags: %v", conf.Tags)
   111  	} else if conf.Tags["host"] != "" {
   112  		slog.Fatalf("host not supported in custom tags, use Hostname instead")
   113  	}
   114  	if conf.PProf != "" {
   115  		go func() {
   116  			slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf)
   117  			slog.Fatal(http.ListenAndServe(conf.PProf, nil))
   118  		}()
   119  	}
   120  	collectors.AddTags = conf.Tags
   121  
   122  	util.InitHostManager(conf.Hostname, conf.FullHost)
   123  
   124  	if conf.ColDir != "" {
   125  		collectors.InitPrograms(conf.ColDir)
   126  	}
   127  	if conf.SNMPTimeout > 0 {
   128  		snmp.Timeout = conf.SNMPTimeout
   129  	}
   130  	if conf.UseSWbemServicesClient {
   131  		conf.InitializeSWbemServices()
   132  	}
   133  	var err error
   134  	check := func(e error) {
   135  		if e != nil {
   136  			err = e
   137  		}
   138  	}
   139  	collectors.Init(conf)
   140  	for _, r := range conf.MetricFilters {
   141  		slog.Infof("Adding MetricFilter: %v\n", r)
   142  		check(collectors.AddMetricFilters(r))
   143  	}
   144  	for _, rmq := range conf.RabbitMQ {
   145  		check(collectors.RabbitMQ(rmq.URL))
   146  	}
   147  	for _, cfg := range conf.SNMP {
   148  		check(collectors.SNMP(cfg, conf.MIBS))
   149  	}
   150  	for _, i := range conf.ICMP {
   151  		check(collectors.ICMP(i.Host))
   152  	}
   153  	for _, a := range conf.AWS {
   154  		check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region, a.BillingProductCodesRegex, a.BillingBucketName, a.BillingBucketPath, a.BillingPurgeDays))
   155  	}
   156  	for _, v := range conf.Vsphere {
   157  		check(collectors.Vsphere(v.User, v.Password, v.Host))
   158  	}
   159  	for _, p := range conf.Process {
   160  		check(collectors.AddProcessConfig(p))
   161  	}
   162  	for _, p := range conf.ProcessDotNet {
   163  		check(collectors.AddProcessDotNetConfig(p))
   164  	}
   165  	for _, h := range conf.HTTPUnit {
   166  		var freq time.Duration
   167  		var parseerr error
   168  		if h.Freq == "" {
   169  			freq = time.Minute * 5
   170  		} else {
   171  			freq, parseerr = time.ParseDuration(h.Freq)
   172  			if parseerr != nil {
   173  				slog.Fatal(parseerr)
   174  			}
   175  			if freq < time.Second {
   176  				slog.Fatalf("Invalid HTTPUnit frequency %s, cannot be less than 1 second.", h.Freq)
   177  			}
   178  		}
   179  		if h.TOML != "" {
   180  			check(collectors.HTTPUnitTOML(h.TOML, freq))
   181  		}
   182  		if h.Hiera != "" {
   183  			check(collectors.HTTPUnitHiera(h.Hiera, freq))
   184  		}
   185  	}
   186  	for _, r := range conf.Riak {
   187  		check(collectors.Riak(r.URL))
   188  	}
   189  
   190  	for _, x := range conf.ExtraHop {
   191  		check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent, x.AdditionalMetrics, x.CertificateSubjectMatch, x.CertificateActivityGroup))
   192  	}
   193  
   194  	if err != nil {
   195  		slog.Fatal(err)
   196  	}
   197  	collectors.KeepalivedCommunity = conf.KeepalivedCommunity
   198  	// Add all process collectors. This is platform specific.
   199  	collectors.WatchProcesses()
   200  	collectors.WatchProcessesDotNet()
   201  
   202  	if *flagFake > 0 {
   203  		collectors.InitFake(*flagFake)
   204  	}
   205  	collect.Debug = *flagDebug
   206  	util.Debug = *flagDebug
   207  	collect.DisableDefaultCollectors = conf.DisableSelf
   208  	c := collectors.Search(conf.Filter)
   209  	if len(c) == 0 {
   210  		slog.Fatalf("Filter %v matches no collectors.", conf.Filter)
   211  	}
   212  	for _, col := range c {
   213  		col.Init()
   214  	}
   215  	err = collectors.AddTagOverrides(c, conf.TagOverride)
   216  	if err != nil {
   217  		slog.Fatalf("Error adding tag overrides: %s", err)
   218  	}
   219  	u, err := parseHost(conf.Host)
   220  	if *flagList {
   221  		list(c)
   222  		return
   223  	} else if *flagPrint {
   224  		u = &url.URL{Scheme: "http", Host: "localhost:0"}
   225  	} else if err != nil {
   226  		slog.Fatalf("invalid host %v: %v", conf.Host, err)
   227  	}
   228  	freq := time.Second * time.Duration(conf.Freq)
   229  	if freq <= 0 {
   230  		slog.Fatal("freq must be > 0")
   231  	}
   232  	collectors.DefaultFreq = freq
   233  	collect.Freq = freq
   234  	if conf.BatchSize < 0 {
   235  		slog.Fatal("BatchSize must be > 0")
   236  	}
   237  	if conf.BatchSize != 0 {
   238  		collect.BatchSize = conf.BatchSize
   239  	}
   240  	collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS})
   241  	if *flagPrint {
   242  		collect.Print = true
   243  	}
   244  	if !*flagDisableMetadata {
   245  		if err := metadata.Init(u, *flagDebug); err != nil {
   246  			slog.Fatal(err)
   247  		}
   248  	}
   249  	cdp, cquit := collectors.Run(c)
   250  	if u != nil {
   251  		slog.Infoln("OpenTSDB host:", hideUrlCredentials(u))
   252  	}
   253  	collect.UseNtlm = conf.UseNtlm
   254  	if err := collect.InitChan(u, "scollector", cdp); err != nil {
   255  		slog.Fatal(err)
   256  	}
   257  	if collect.DisableDefaultCollectors == false && version.VersionDate != "" {
   258  		v, err := strconv.ParseInt(version.VersionDate, 10, 64)
   259  		if err == nil {
   260  			go func() {
   261  				metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
   262  					"Scollector version number, which indicates when scollector was built.")
   263  				for {
   264  					if err := collect.Put("version", collect.Tags, v); err != nil {
   265  						slog.Error(err)
   266  					}
   267  					time.Sleep(time.Hour)
   268  				}
   269  			}()
   270  		}
   271  	}
   272  	if *flagBatchSize > 0 {
   273  		collect.BatchSize = *flagBatchSize
   274  	}
   275  
   276  	if conf.MaxQueueLen != 0 {
   277  		if conf.MaxQueueLen < collect.BatchSize {
   278  			slog.Fatalf("MaxQueueLen must be >= %d (BatchSize)", collect.BatchSize)
   279  		}
   280  		collect.MaxQueueLen = conf.MaxQueueLen
   281  	}
   282  	maxMemMB := uint64(500)
   283  	if conf.MaxMem != 0 {
   284  		maxMemMB = conf.MaxMem
   285  	}
   286  	go func() {
   287  		var m runtime.MemStats
   288  		for range time.Tick(time.Second * 30) {
   289  			runtime.ReadMemStats(&m)
   290  			allocMB := m.Alloc / 1024 / 1024
   291  			if allocMB > maxMemMB {
   292  				slog.Fatalf("memory max runtime reached: (current alloc: %v megabytes, max: %v megabytes)", allocMB, maxMemMB)
   293  			}
   294  			//See proccess_windows.go and process_linux.go for total process memory usage.
   295  			//Note that in linux the rss metric includes shared pages, where as in
   296  			//Windows the private working set does not include shared memory.
   297  			//Total memory used seems to scale linerarly with m.Alloc.
   298  			//But we want this to catch a memory leak outside the runtime (WMI/CGO).
   299  			//So for now just add any runtime allocations to the allowed total limit.
   300  			maxMemTotalMB := maxMemMB + allocMB
   301  			if collectors.TotalScollectorMemoryMB > maxMemTotalMB {
   302  				slog.Fatalf("memory max total reached: (current total: %v megabytes, current runtime alloc: %v megabytes, max: %v megabytes)", collectors.TotalScollectorMemoryMB, allocMB, maxMemTotalMB)
   303  			}
   304  		}
   305  	}()
   306  	sChan := make(chan os.Signal)
   307  	signal.Notify(sChan, os.Interrupt)
   308  	<-sChan
   309  	close(cquit)
   310  	// try to flush all datapoints on sigterm, but quit after 5 seconds no matter what.
   311  	time.AfterFunc(5*time.Second, func() {
   312  		os.Exit(0)
   313  	})
   314  	collect.Flush()
   315  }
   316  
   317  func readConf() *conf.Conf {
   318  	conf := &conf.Conf{
   319  		Freq: 15,
   320  	}
   321  	loc := *flagConf
   322  	if *flagConf == "" {
   323  		p, err := exePath()
   324  		if err != nil {
   325  			slog.Error(err)
   326  			return conf
   327  		}
   328  		dir := filepath.Dir(p)
   329  		loc = filepath.Join(dir, "scollector.toml")
   330  	}
   331  	f, err := os.Open(loc)
   332  	if err != nil {
   333  		if *flagConf != "" {
   334  			slog.Fatal(err)
   335  		}
   336  		if *flagDebug {
   337  			slog.Error(err)
   338  		}
   339  	} else {
   340  		defer f.Close()
   341  		md, err := toml.DecodeReader(f, conf)
   342  		if err != nil {
   343  			slog.Fatal(err)
   344  		}
   345  		if u := md.Undecoded(); len(u) > 0 {
   346  			slog.Fatalf("extra keys in %s: %v", loc, u)
   347  		}
   348  	}
   349  	return conf
   350  }
   351  
   352  func exePath() (string, error) {
   353  	prog := os.Args[0]
   354  	p, err := filepath.Abs(prog)
   355  	if err != nil {
   356  		return "", err
   357  	}
   358  	fi, err := os.Stat(p)
   359  	if err == nil {
   360  		if !fi.Mode().IsDir() {
   361  			return p, nil
   362  		}
   363  		err = fmt.Errorf("%s is directory", p)
   364  	}
   365  	if filepath.Ext(p) == "" {
   366  		p += ".exe"
   367  		fi, err := os.Stat(p)
   368  		if err == nil {
   369  			if !fi.Mode().IsDir() {
   370  				return p, nil
   371  			}
   372  			err = fmt.Errorf("%s is directory", p)
   373  		}
   374  	}
   375  	return "", err
   376  }
   377  
   378  func list(cs []collectors.Collector) {
   379  	for _, c := range cs {
   380  		fmt.Println(c.Name())
   381  	}
   382  }
   383  
   384  func parseHost(host string) (*url.URL, error) {
   385  	if !strings.Contains(host, "//") {
   386  		host = "http://" + host
   387  	}
   388  	u, err := url.Parse(host)
   389  	if err != nil {
   390  		return nil, err
   391  	}
   392  	if u.Host == "" {
   393  		return nil, fmt.Errorf("no host specified")
   394  	}
   395  	return u, nil
   396  }
   397  
   398  func hideUrlCredentials(u *url.URL) *url.URL {
   399  	// Copy original url, replace credentials, e. g. for logging
   400  	if u.User != nil {
   401  		u2 := new(url.URL)
   402  		*u2 = *u
   403  		u2.User = url.UserPassword("xxx", "xxx")
   404  		return u2
   405  	}
   406  	return u
   407  }
   408  
   409  func printPut(c chan *opentsdb.DataPoint) {
   410  	for dp := range c {
   411  		b, _ := json.Marshal(dp)
   412  		slog.Info(string(b))
   413  	}
   414  }
   415  
   416  func toToml(fname string) {
   417  	var c conf.Conf
   418  	b, err := ioutil.ReadFile(*flagConf)
   419  	if err != nil {
   420  		slog.Fatal(err)
   421  	}
   422  	extra := new(bytes.Buffer)
   423  	var hap conf.HAProxy
   424  	for i, line := range strings.Split(string(b), "\n") {
   425  		if strings.TrimSpace(line) == "" {
   426  			continue
   427  		}
   428  		sp := strings.SplitN(line, "=", 2)
   429  		if len(sp) != 2 {
   430  			slog.Fatalf("expected = in %v:%v", *flagConf, i+1)
   431  		}
   432  		k := strings.TrimSpace(sp[0])
   433  		v := strings.TrimSpace(sp[1])
   434  		switch k {
   435  		case "host":
   436  			c.Host = v
   437  		case "hostname":
   438  			c.Hostname = v
   439  		case "filter":
   440  			c.Filter = strings.Split(v, ",")
   441  		case "coldir":
   442  			c.ColDir = v
   443  		case "snmp":
   444  			for _, s := range strings.Split(v, ",") {
   445  				sp := strings.Split(s, "@")
   446  				if len(sp) != 2 {
   447  					slog.Fatal("invalid snmp string:", v)
   448  				}
   449  				c.SNMP = append(c.SNMP, conf.SNMP{
   450  					Community: sp[0],
   451  					Host:      sp[1],
   452  				})
   453  			}
   454  		case "icmp":
   455  			for _, i := range strings.Split(v, ",") {
   456  				c.ICMP = append(c.ICMP, conf.ICMP{Host: i})
   457  			}
   458  		case "haproxy":
   459  			if v != "" {
   460  				for _, s := range strings.Split(v, ",") {
   461  					sp := strings.SplitN(s, ":", 2)
   462  					if len(sp) != 2 {
   463  						slog.Fatal("invalid haproxy string:", v)
   464  					}
   465  					if hap.User != "" || hap.Password != "" {
   466  						slog.Fatal("only one haproxy line allowed")
   467  					}
   468  					hap.User = sp[0]
   469  					hap.Password = sp[1]
   470  				}
   471  			}
   472  		case "haproxy_instance":
   473  			sp := strings.SplitN(v, ":", 2)
   474  			if len(sp) != 2 {
   475  				slog.Fatal("invalid haproxy_instance string:", v)
   476  			}
   477  			hap.Instances = append(hap.Instances, conf.HAProxyInstance{
   478  				Tier: sp[0],
   479  				URL:  sp[1],
   480  			})
   481  		case "tags":
   482  			tags, err := opentsdb.ParseTags(v)
   483  			if err != nil {
   484  				slog.Fatal(err)
   485  			}
   486  			c.Tags = tags
   487  		case "aws":
   488  			for _, s := range strings.Split(v, ",") {
   489  				sp := strings.SplitN(s, ":", 2)
   490  				if len(sp) != 2 {
   491  					slog.Fatal("invalid AWS string:", v)
   492  				}
   493  				accessKey := sp[0]
   494  				idx := strings.LastIndex(sp[1], "@")
   495  				if idx == -1 {
   496  					slog.Fatal("invalid AWS string:", v)
   497  				}
   498  				secretKey := sp[1][:idx]
   499  				region := sp[1][idx+1:]
   500  				if len(accessKey) == 0 || len(secretKey) == 0 || len(region) == 0 {
   501  					slog.Fatal("invalid AWS string:", v)
   502  				}
   503  				c.AWS = append(c.AWS, conf.AWS{
   504  					AccessKey: accessKey,
   505  					SecretKey: secretKey,
   506  					Region:    region,
   507  				})
   508  			}
   509  		case "vsphere":
   510  			for _, s := range strings.Split(v, ",") {
   511  				sp := strings.SplitN(s, ":", 2)
   512  				if len(sp) != 2 {
   513  					slog.Fatal("invalid vsphere string:", v)
   514  				}
   515  				user := sp[0]
   516  				idx := strings.LastIndex(sp[1], "@")
   517  				if idx == -1 {
   518  					slog.Fatal("invalid vsphere string:", v)
   519  				}
   520  				pwd := sp[1][:idx]
   521  				host := sp[1][idx+1:]
   522  				if len(user) == 0 || len(pwd) == 0 || len(host) == 0 {
   523  					slog.Fatal("invalid vsphere string:", v)
   524  				}
   525  				c.Vsphere = append(c.Vsphere, conf.Vsphere{
   526  					User:     user,
   527  					Password: pwd,
   528  					Host:     host,
   529  				})
   530  			}
   531  		case "freq":
   532  			freq, err := strconv.Atoi(v)
   533  			if err != nil {
   534  				slog.Fatal(err)
   535  			}
   536  			c.Freq = freq
   537  		case "process":
   538  			if runtime.GOOS == "linux" {
   539  				var p struct {
   540  					Command string
   541  					Name    string
   542  					Args    string
   543  				}
   544  				sp := strings.Split(v, ",")
   545  				if len(sp) > 1 {
   546  					p.Name = sp[1]
   547  				}
   548  				if len(sp) > 2 {
   549  					p.Args = sp[2]
   550  				}
   551  				p.Command = sp[0]
   552  				extra.WriteString(fmt.Sprintf(`
   553  [[Process]]
   554    Command = %q
   555    Name = %q
   556    Args = %q
   557  `, p.Command, p.Name, p.Args))
   558  			} else if runtime.GOOS == "windows" {
   559  
   560  				extra.WriteString(fmt.Sprintf(`
   561  [[Process]]
   562    Name = %q
   563  `, v))
   564  			}
   565  		case "process_dotnet":
   566  			c.ProcessDotNet = append(c.ProcessDotNet, conf.ProcessDotNet{Name: v})
   567  		case "keepalived_community":
   568  			c.KeepalivedCommunity = v
   569  		default:
   570  			slog.Fatalf("unknown key in %v:%v", *flagConf, i+1)
   571  		}
   572  	}
   573  	if len(hap.Instances) > 0 {
   574  		c.HAProxy = append(c.HAProxy, hap)
   575  	}
   576  
   577  	f, err := os.Create(fname)
   578  	if err != nil {
   579  		slog.Fatal(err)
   580  	}
   581  	if err := toml.NewEncoder(f).Encode(&c); err != nil {
   582  		slog.Fatal(err)
   583  	}
   584  	if _, err := extra.WriteTo(f); err != nil {
   585  		slog.Fatal(err)
   586  	}
   587  	f.Close()
   588  }