bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/main.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "encoding/json" 6 _ "expvar" 7 "flag" 8 "fmt" 9 "io/ioutil" 10 "log" 11 "net/http" 12 _ "net/http/pprof" 13 "net/url" 14 "os" 15 "os/signal" 16 "path/filepath" 17 "runtime" 18 "strconv" 19 "strings" 20 "time" 21 22 version "bosun.org/_version" 23 "bosun.org/cmd/scollector/collectors" 24 "bosun.org/cmd/scollector/conf" 25 "bosun.org/collect" 26 "bosun.org/metadata" 27 "bosun.org/opentsdb" 28 "bosun.org/slog" 29 "bosun.org/snmp" 30 "bosun.org/util" 31 "github.com/BurntSushi/toml" 32 "github.com/facebookgo/httpcontrol" 33 ) 34 35 var ( 36 flagHost = flag.String("h", "", "OpenTSDB or Bosun host to send data. Overrides Host in conf file.") 37 flagFilter = flag.String("f", "", "Filters collectors matching these terms, separated by comma. Overrides Filter in conf file.") 38 flagList = flag.Bool("l", false, "List available collectors.") 39 flagPrint = flag.Bool("p", false, "Print to screen instead of sending to a host") 40 flagBatchSize = flag.Int("b", 0, "OpenTSDB batch size. Default is 500.") 41 flagFake = flag.Int("fake", 0, "Generates X fake data points on the test.fake metric per second.") 42 flagDebug = flag.Bool("d", false, "Enables debug output.") 43 flagDisableMetadata = flag.Bool("m", false, "Disable sending of metadata.") 44 flagVersion = flag.Bool("version", false, "Prints the version and exits.") 45 flagConf = flag.String("conf", "", "Location of configuration file. Defaults to scollector.toml in directory of the scollector executable.") 46 flagToToml = flag.String("totoml", "", "Location of destination toml file to convert. Reads from value of -conf.") 47 flagNtlm = flag.Bool("useNtlm", false, "Specifies to use NTLM authentication.") 48 49 mains []func() 50 ) 51 52 type scollectorHTTPTransport struct { 53 UserAgent string 54 http.RoundTripper 55 } 56 57 func (t *scollectorHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { 58 if req.Header.Get("User-Agent") == "" { 59 req.Header.Add("User-Agent", t.UserAgent) 60 } 61 return t.RoundTripper.RoundTrip(req) 62 } 63 64 func main() { 65 flag.Parse() 66 if *flagToToml != "" { 67 toToml(*flagToToml) 68 fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)") 69 return 70 } 71 if *flagPrint || *flagDebug { 72 slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)}) 73 } 74 if *flagVersion { 75 fmt.Println(version.GetVersionInfo("scollector")) 76 os.Exit(0) 77 } 78 for _, m := range mains { 79 m() 80 } 81 conf := readConf() 82 ua := "Scollector/" + version.ShortVersion() 83 if conf.UserAgentMessage != "" { 84 ua += fmt.Sprintf(" (%s)", conf.UserAgentMessage) 85 } 86 if conf.AuthToken != "" { 87 collect.AuthToken = conf.AuthToken 88 metadata.AuthToken = conf.AuthToken 89 } 90 client := &http.Client{ 91 Transport: &scollectorHTTPTransport{ 92 ua, 93 &httpcontrol.Transport{ 94 RequestTimeout: time.Minute, 95 }, 96 }, 97 } 98 http.DefaultClient = client 99 collect.DefaultClient = client 100 if *flagHost != "" { 101 conf.Host = *flagHost 102 } 103 if *flagNtlm { 104 conf.UseNtlm = *flagNtlm 105 } 106 if *flagFilter != "" { 107 conf.Filter = strings.Split(*flagFilter, ",") 108 } 109 if !conf.Tags.Valid() { 110 slog.Fatalf("invalid tags: %v", conf.Tags) 111 } else if conf.Tags["host"] != "" { 112 slog.Fatalf("host not supported in custom tags, use Hostname instead") 113 } 114 if conf.PProf != "" { 115 go func() { 116 slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf) 117 slog.Fatal(http.ListenAndServe(conf.PProf, nil)) 118 }() 119 } 120 collectors.AddTags = conf.Tags 121 122 util.InitHostManager(conf.Hostname, conf.FullHost) 123 124 if conf.ColDir != "" { 125 collectors.InitPrograms(conf.ColDir) 126 } 127 if conf.SNMPTimeout > 0 { 128 snmp.Timeout = conf.SNMPTimeout 129 } 130 if conf.UseSWbemServicesClient { 131 conf.InitializeSWbemServices() 132 } 133 var err error 134 check := func(e error) { 135 if e != nil { 136 err = e 137 } 138 } 139 collectors.Init(conf) 140 for _, r := range conf.MetricFilters { 141 slog.Infof("Adding MetricFilter: %v\n", r) 142 check(collectors.AddMetricFilters(r)) 143 } 144 for _, rmq := range conf.RabbitMQ { 145 check(collectors.RabbitMQ(rmq.URL)) 146 } 147 for _, cfg := range conf.SNMP { 148 check(collectors.SNMP(cfg, conf.MIBS)) 149 } 150 for _, i := range conf.ICMP { 151 check(collectors.ICMP(i.Host)) 152 } 153 for _, a := range conf.AWS { 154 check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region, a.BillingProductCodesRegex, a.BillingBucketName, a.BillingBucketPath, a.BillingPurgeDays)) 155 } 156 for _, v := range conf.Vsphere { 157 check(collectors.Vsphere(v.User, v.Password, v.Host)) 158 } 159 for _, p := range conf.Process { 160 check(collectors.AddProcessConfig(p)) 161 } 162 for _, p := range conf.ProcessDotNet { 163 check(collectors.AddProcessDotNetConfig(p)) 164 } 165 for _, h := range conf.HTTPUnit { 166 var freq time.Duration 167 var parseerr error 168 if h.Freq == "" { 169 freq = time.Minute * 5 170 } else { 171 freq, parseerr = time.ParseDuration(h.Freq) 172 if parseerr != nil { 173 slog.Fatal(parseerr) 174 } 175 if freq < time.Second { 176 slog.Fatalf("Invalid HTTPUnit frequency %s, cannot be less than 1 second.", h.Freq) 177 } 178 } 179 if h.TOML != "" { 180 check(collectors.HTTPUnitTOML(h.TOML, freq)) 181 } 182 if h.Hiera != "" { 183 check(collectors.HTTPUnitHiera(h.Hiera, freq)) 184 } 185 } 186 for _, r := range conf.Riak { 187 check(collectors.Riak(r.URL)) 188 } 189 190 for _, x := range conf.ExtraHop { 191 check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent, x.AdditionalMetrics, x.CertificateSubjectMatch, x.CertificateActivityGroup)) 192 } 193 194 if err != nil { 195 slog.Fatal(err) 196 } 197 collectors.KeepalivedCommunity = conf.KeepalivedCommunity 198 // Add all process collectors. This is platform specific. 199 collectors.WatchProcesses() 200 collectors.WatchProcessesDotNet() 201 202 if *flagFake > 0 { 203 collectors.InitFake(*flagFake) 204 } 205 collect.Debug = *flagDebug 206 util.Debug = *flagDebug 207 collect.DisableDefaultCollectors = conf.DisableSelf 208 c := collectors.Search(conf.Filter) 209 if len(c) == 0 { 210 slog.Fatalf("Filter %v matches no collectors.", conf.Filter) 211 } 212 for _, col := range c { 213 col.Init() 214 } 215 err = collectors.AddTagOverrides(c, conf.TagOverride) 216 if err != nil { 217 slog.Fatalf("Error adding tag overrides: %s", err) 218 } 219 u, err := parseHost(conf.Host) 220 if *flagList { 221 list(c) 222 return 223 } else if *flagPrint { 224 u = &url.URL{Scheme: "http", Host: "localhost:0"} 225 } else if err != nil { 226 slog.Fatalf("invalid host %v: %v", conf.Host, err) 227 } 228 freq := time.Second * time.Duration(conf.Freq) 229 if freq <= 0 { 230 slog.Fatal("freq must be > 0") 231 } 232 collectors.DefaultFreq = freq 233 collect.Freq = freq 234 if conf.BatchSize < 0 { 235 slog.Fatal("BatchSize must be > 0") 236 } 237 if conf.BatchSize != 0 { 238 collect.BatchSize = conf.BatchSize 239 } 240 collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS}) 241 if *flagPrint { 242 collect.Print = true 243 } 244 if !*flagDisableMetadata { 245 if err := metadata.Init(u, *flagDebug); err != nil { 246 slog.Fatal(err) 247 } 248 } 249 cdp, cquit := collectors.Run(c) 250 if u != nil { 251 slog.Infoln("OpenTSDB host:", hideUrlCredentials(u)) 252 } 253 collect.UseNtlm = conf.UseNtlm 254 if err := collect.InitChan(u, "scollector", cdp); err != nil { 255 slog.Fatal(err) 256 } 257 if collect.DisableDefaultCollectors == false && version.VersionDate != "" { 258 v, err := strconv.ParseInt(version.VersionDate, 10, 64) 259 if err == nil { 260 go func() { 261 metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None, 262 "Scollector version number, which indicates when scollector was built.") 263 for { 264 if err := collect.Put("version", collect.Tags, v); err != nil { 265 slog.Error(err) 266 } 267 time.Sleep(time.Hour) 268 } 269 }() 270 } 271 } 272 if *flagBatchSize > 0 { 273 collect.BatchSize = *flagBatchSize 274 } 275 276 if conf.MaxQueueLen != 0 { 277 if conf.MaxQueueLen < collect.BatchSize { 278 slog.Fatalf("MaxQueueLen must be >= %d (BatchSize)", collect.BatchSize) 279 } 280 collect.MaxQueueLen = conf.MaxQueueLen 281 } 282 maxMemMB := uint64(500) 283 if conf.MaxMem != 0 { 284 maxMemMB = conf.MaxMem 285 } 286 go func() { 287 var m runtime.MemStats 288 for range time.Tick(time.Second * 30) { 289 runtime.ReadMemStats(&m) 290 allocMB := m.Alloc / 1024 / 1024 291 if allocMB > maxMemMB { 292 slog.Fatalf("memory max runtime reached: (current alloc: %v megabytes, max: %v megabytes)", allocMB, maxMemMB) 293 } 294 //See proccess_windows.go and process_linux.go for total process memory usage. 295 //Note that in linux the rss metric includes shared pages, where as in 296 //Windows the private working set does not include shared memory. 297 //Total memory used seems to scale linerarly with m.Alloc. 298 //But we want this to catch a memory leak outside the runtime (WMI/CGO). 299 //So for now just add any runtime allocations to the allowed total limit. 300 maxMemTotalMB := maxMemMB + allocMB 301 if collectors.TotalScollectorMemoryMB > maxMemTotalMB { 302 slog.Fatalf("memory max total reached: (current total: %v megabytes, current runtime alloc: %v megabytes, max: %v megabytes)", collectors.TotalScollectorMemoryMB, allocMB, maxMemTotalMB) 303 } 304 } 305 }() 306 sChan := make(chan os.Signal) 307 signal.Notify(sChan, os.Interrupt) 308 <-sChan 309 close(cquit) 310 // try to flush all datapoints on sigterm, but quit after 5 seconds no matter what. 311 time.AfterFunc(5*time.Second, func() { 312 os.Exit(0) 313 }) 314 collect.Flush() 315 } 316 317 func readConf() *conf.Conf { 318 conf := &conf.Conf{ 319 Freq: 15, 320 } 321 loc := *flagConf 322 if *flagConf == "" { 323 p, err := exePath() 324 if err != nil { 325 slog.Error(err) 326 return conf 327 } 328 dir := filepath.Dir(p) 329 loc = filepath.Join(dir, "scollector.toml") 330 } 331 f, err := os.Open(loc) 332 if err != nil { 333 if *flagConf != "" { 334 slog.Fatal(err) 335 } 336 if *flagDebug { 337 slog.Error(err) 338 } 339 } else { 340 defer f.Close() 341 md, err := toml.DecodeReader(f, conf) 342 if err != nil { 343 slog.Fatal(err) 344 } 345 if u := md.Undecoded(); len(u) > 0 { 346 slog.Fatalf("extra keys in %s: %v", loc, u) 347 } 348 } 349 return conf 350 } 351 352 func exePath() (string, error) { 353 prog := os.Args[0] 354 p, err := filepath.Abs(prog) 355 if err != nil { 356 return "", err 357 } 358 fi, err := os.Stat(p) 359 if err == nil { 360 if !fi.Mode().IsDir() { 361 return p, nil 362 } 363 err = fmt.Errorf("%s is directory", p) 364 } 365 if filepath.Ext(p) == "" { 366 p += ".exe" 367 fi, err := os.Stat(p) 368 if err == nil { 369 if !fi.Mode().IsDir() { 370 return p, nil 371 } 372 err = fmt.Errorf("%s is directory", p) 373 } 374 } 375 return "", err 376 } 377 378 func list(cs []collectors.Collector) { 379 for _, c := range cs { 380 fmt.Println(c.Name()) 381 } 382 } 383 384 func parseHost(host string) (*url.URL, error) { 385 if !strings.Contains(host, "//") { 386 host = "http://" + host 387 } 388 u, err := url.Parse(host) 389 if err != nil { 390 return nil, err 391 } 392 if u.Host == "" { 393 return nil, fmt.Errorf("no host specified") 394 } 395 return u, nil 396 } 397 398 func hideUrlCredentials(u *url.URL) *url.URL { 399 // Copy original url, replace credentials, e. g. for logging 400 if u.User != nil { 401 u2 := new(url.URL) 402 *u2 = *u 403 u2.User = url.UserPassword("xxx", "xxx") 404 return u2 405 } 406 return u 407 } 408 409 func printPut(c chan *opentsdb.DataPoint) { 410 for dp := range c { 411 b, _ := json.Marshal(dp) 412 slog.Info(string(b)) 413 } 414 } 415 416 func toToml(fname string) { 417 var c conf.Conf 418 b, err := ioutil.ReadFile(*flagConf) 419 if err != nil { 420 slog.Fatal(err) 421 } 422 extra := new(bytes.Buffer) 423 var hap conf.HAProxy 424 for i, line := range strings.Split(string(b), "\n") { 425 if strings.TrimSpace(line) == "" { 426 continue 427 } 428 sp := strings.SplitN(line, "=", 2) 429 if len(sp) != 2 { 430 slog.Fatalf("expected = in %v:%v", *flagConf, i+1) 431 } 432 k := strings.TrimSpace(sp[0]) 433 v := strings.TrimSpace(sp[1]) 434 switch k { 435 case "host": 436 c.Host = v 437 case "hostname": 438 c.Hostname = v 439 case "filter": 440 c.Filter = strings.Split(v, ",") 441 case "coldir": 442 c.ColDir = v 443 case "snmp": 444 for _, s := range strings.Split(v, ",") { 445 sp := strings.Split(s, "@") 446 if len(sp) != 2 { 447 slog.Fatal("invalid snmp string:", v) 448 } 449 c.SNMP = append(c.SNMP, conf.SNMP{ 450 Community: sp[0], 451 Host: sp[1], 452 }) 453 } 454 case "icmp": 455 for _, i := range strings.Split(v, ",") { 456 c.ICMP = append(c.ICMP, conf.ICMP{Host: i}) 457 } 458 case "haproxy": 459 if v != "" { 460 for _, s := range strings.Split(v, ",") { 461 sp := strings.SplitN(s, ":", 2) 462 if len(sp) != 2 { 463 slog.Fatal("invalid haproxy string:", v) 464 } 465 if hap.User != "" || hap.Password != "" { 466 slog.Fatal("only one haproxy line allowed") 467 } 468 hap.User = sp[0] 469 hap.Password = sp[1] 470 } 471 } 472 case "haproxy_instance": 473 sp := strings.SplitN(v, ":", 2) 474 if len(sp) != 2 { 475 slog.Fatal("invalid haproxy_instance string:", v) 476 } 477 hap.Instances = append(hap.Instances, conf.HAProxyInstance{ 478 Tier: sp[0], 479 URL: sp[1], 480 }) 481 case "tags": 482 tags, err := opentsdb.ParseTags(v) 483 if err != nil { 484 slog.Fatal(err) 485 } 486 c.Tags = tags 487 case "aws": 488 for _, s := range strings.Split(v, ",") { 489 sp := strings.SplitN(s, ":", 2) 490 if len(sp) != 2 { 491 slog.Fatal("invalid AWS string:", v) 492 } 493 accessKey := sp[0] 494 idx := strings.LastIndex(sp[1], "@") 495 if idx == -1 { 496 slog.Fatal("invalid AWS string:", v) 497 } 498 secretKey := sp[1][:idx] 499 region := sp[1][idx+1:] 500 if len(accessKey) == 0 || len(secretKey) == 0 || len(region) == 0 { 501 slog.Fatal("invalid AWS string:", v) 502 } 503 c.AWS = append(c.AWS, conf.AWS{ 504 AccessKey: accessKey, 505 SecretKey: secretKey, 506 Region: region, 507 }) 508 } 509 case "vsphere": 510 for _, s := range strings.Split(v, ",") { 511 sp := strings.SplitN(s, ":", 2) 512 if len(sp) != 2 { 513 slog.Fatal("invalid vsphere string:", v) 514 } 515 user := sp[0] 516 idx := strings.LastIndex(sp[1], "@") 517 if idx == -1 { 518 slog.Fatal("invalid vsphere string:", v) 519 } 520 pwd := sp[1][:idx] 521 host := sp[1][idx+1:] 522 if len(user) == 0 || len(pwd) == 0 || len(host) == 0 { 523 slog.Fatal("invalid vsphere string:", v) 524 } 525 c.Vsphere = append(c.Vsphere, conf.Vsphere{ 526 User: user, 527 Password: pwd, 528 Host: host, 529 }) 530 } 531 case "freq": 532 freq, err := strconv.Atoi(v) 533 if err != nil { 534 slog.Fatal(err) 535 } 536 c.Freq = freq 537 case "process": 538 if runtime.GOOS == "linux" { 539 var p struct { 540 Command string 541 Name string 542 Args string 543 } 544 sp := strings.Split(v, ",") 545 if len(sp) > 1 { 546 p.Name = sp[1] 547 } 548 if len(sp) > 2 { 549 p.Args = sp[2] 550 } 551 p.Command = sp[0] 552 extra.WriteString(fmt.Sprintf(` 553 [[Process]] 554 Command = %q 555 Name = %q 556 Args = %q 557 `, p.Command, p.Name, p.Args)) 558 } else if runtime.GOOS == "windows" { 559 560 extra.WriteString(fmt.Sprintf(` 561 [[Process]] 562 Name = %q 563 `, v)) 564 } 565 case "process_dotnet": 566 c.ProcessDotNet = append(c.ProcessDotNet, conf.ProcessDotNet{Name: v}) 567 case "keepalived_community": 568 c.KeepalivedCommunity = v 569 default: 570 slog.Fatalf("unknown key in %v:%v", *flagConf, i+1) 571 } 572 } 573 if len(hap.Instances) > 0 { 574 c.HAProxy = append(c.HAProxy, hap) 575 } 576 577 f, err := os.Create(fname) 578 if err != nil { 579 slog.Fatal(err) 580 } 581 if err := toml.NewEncoder(f).Encode(&c); err != nil { 582 slog.Fatal(err) 583 } 584 if _, err := extra.WriteTo(f); err != nil { 585 slog.Fatal(err) 586 } 587 f.Close() 588 }