bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/puppet.go (about) 1 // +build windows linux 2 3 package collectors 4 5 import ( 6 "encoding/json" 7 "fmt" 8 "io/ioutil" 9 "os" 10 "strconv" 11 "time" 12 13 "bosun.org/metadata" 14 "bosun.org/opentsdb" 15 "gopkg.in/yaml.v1" 16 ) 17 18 func init() { 19 collectors = append(collectors, &IntervalCollector{F: puppet, Enable: puppetEnable}) 20 } 21 22 func puppetEnable() bool { 23 _, err := os.Stat(puppetPath) 24 return err == nil 25 } 26 27 type PRSummary struct { 28 Changes struct { 29 Total float64 `yaml:"total"` 30 } `yaml:"changes"` 31 Events struct { 32 Failure float64 `yaml:"failure"` 33 Success float64 `yaml:"success"` 34 Total float64 `yaml:"total"` 35 } `yaml:"events"` 36 Resources struct { 37 Changed float64 `yaml:"changed"` 38 Failed float64 `yaml:"failed"` 39 FailedToRestart float64 `yaml:"failed_to_restart"` 40 OutOfSync float64 `yaml:"out_of_sync"` 41 Restarted float64 `yaml:"restarted"` 42 Scheduled float64 `yaml:"scheduled"` 43 Skipped float64 `yaml:"skipped"` 44 Total float64 `yaml:"total"` 45 } `yaml:"resources"` 46 Time map[string]string `yaml:"time"` 47 Version struct { 48 Config string `yaml:"config"` 49 Puppet string `yaml:"puppet"` 50 } `yaml:"version"` 51 } 52 53 type PRReport struct { 54 Status string `yaml:"status"` 55 Time string `yaml:"time"` // 2006-01-02 15:04:05.999999 -07:00 56 } 57 58 func puppet() (opentsdb.MultiDataPoint, error) { 59 var md opentsdb.MultiDataPoint 60 // See if puppet has been disabled (i.e. `puppet agent --disable 'Reason'`) 61 var disabled, noReason int 62 if v, err := ioutil.ReadFile(puppetDisabled); err == nil { 63 disabled = 1 64 d := struct { 65 Disabled string `json:"disabled_message"` 66 }{} 67 if err := json.Unmarshal(v, &d); err == nil && d.Disabled != "" { 68 if d.Disabled == "reason not specified" { 69 noReason = 1 70 } 71 metadata.AddMeta("", nil, "puppet.disabled_reason", d.Disabled, true) 72 } 73 } 74 Add(&md, "puppet.disabled", disabled, nil, metadata.Gauge, metadata.Count, "") 75 Add(&md, "puppet.disabled_no_reason", noReason, nil, metadata.Gauge, metadata.Count, "") 76 // Gather stats from the run summary 77 s, err := ioutil.ReadFile(puppetRunSummary) 78 if err != nil { 79 return nil, err 80 } 81 var m PRSummary 82 if err = yaml.Unmarshal(s, &m); err != nil { 83 return nil, err 84 } 85 last_run, err := strconv.ParseInt(m.Time["last_run"], 10, 64) 86 seconds_since_run := time.Now().Unix() - last_run 87 //m.Version.Config appears to be the unix timestamp 88 AddTS(&md, "puppet.run.resources", last_run, m.Resources.Changed, opentsdb.TagSet{"resource": "changed"}, metadata.Gauge, metadata.Count, descPuppetChanged) 89 AddTS(&md, "puppet.run.resources", last_run, m.Resources.Failed, opentsdb.TagSet{"resource": "failed"}, metadata.Gauge, metadata.Count, descPuppetFailed) 90 AddTS(&md, "puppet.run.resources", last_run, m.Resources.FailedToRestart, opentsdb.TagSet{"resource": "failed_to_restart"}, metadata.Gauge, metadata.Count, descPuppetFailedToRestart) 91 AddTS(&md, "puppet.run.resources", last_run, m.Resources.OutOfSync, opentsdb.TagSet{"resource": "out_of_sync"}, metadata.Gauge, metadata.Count, descPuppetOutOfSync) 92 AddTS(&md, "puppet.run.resources", last_run, m.Resources.Restarted, opentsdb.TagSet{"resource": "restarted"}, metadata.Gauge, metadata.Count, descPuppetRestarted) 93 AddTS(&md, "puppet.run.resources", last_run, m.Resources.Scheduled, opentsdb.TagSet{"resource": "scheduled"}, metadata.Gauge, metadata.Count, descPuppetScheduled) 94 AddTS(&md, "puppet.run.resources", last_run, m.Resources.Skipped, opentsdb.TagSet{"resource": "skipped"}, metadata.Gauge, metadata.Count, descPuppetSkipped) 95 AddTS(&md, "puppet.run.resources_total", last_run, m.Resources.Total, nil, metadata.Gauge, metadata.Count, descPuppetTotalResources) 96 AddTS(&md, "puppet.run.changes", last_run, m.Changes.Total, nil, metadata.Gauge, metadata.Count, descPuppetTotalChanges) 97 Add(&md, "puppet.last_run", seconds_since_run, nil, metadata.Gauge, metadata.Second, descPuppetLastRun) 98 for k, v := range m.Time { 99 metric, err := strconv.ParseFloat(v, 64) 100 if err != nil { 101 return md, fmt.Errorf("Error parsing time: %s", err) 102 } 103 if k == "total" { 104 AddTS(&md, "puppet.run_duration_total", last_run, metric, nil, metadata.Gauge, metadata.Second, descPuppetTotalTime) 105 } else if k != "last_run" { 106 AddTS(&md, "puppet.run_duration", last_run, metric, opentsdb.TagSet{"time": k}, metadata.Gauge, metadata.Second, descPuppetModuleTime) 107 } 108 } 109 110 // Not all hosts will use puppet run reports 111 if _, err := os.Stat(puppetRunReport); err == nil { 112 f, err := ioutil.ReadFile(puppetRunReport) 113 if err != nil { 114 return md, err 115 } 116 117 var report PRReport 118 if err = yaml.Unmarshal(f, &report); err != nil { 119 return md, err 120 } 121 122 t, err := time.Parse("2006-01-02 15:04:05.999999 -07:00", report.Time) 123 if err != nil { 124 // Puppet 5 changed the time format 125 t, err = time.Parse("2006-01-02T15:04:05.999999-07:00", report.Time) 126 } 127 if err != nil { 128 return md, fmt.Errorf("Error parsing report time: %s", err) 129 } 130 // As listed at https://docs.puppetlabs.com/puppet/latest/reference/format_report.html 131 var statusCode = map[string]int{ 132 "changed": 0, 133 "unchanged": 1, 134 "failed": 2, 135 } 136 if status, ok := statusCode[report.Status]; ok { 137 AddTS(&md, "puppet.run.status", t.Unix(), status, nil, metadata.Gauge, metadata.StatusCode, descPuppetRunStatus) 138 } else { 139 return md, fmt.Errorf("Unknown status in %s: %s", puppetRunReport, report.Status) 140 } 141 } 142 return md, nil 143 } 144 145 const ( 146 descPuppetChanged = "Number of resources for which changes were applied." 147 descPuppetFailed = "Number of resources which caused an error during evaluation." 148 descPuppetFailedToRestart = "Number of service resources which failed to restart." 149 descPuppetOutOfSync = "Number of resources which should have been changed if catalog was applied." 150 descPuppetRestarted = "Number of service resources which were restarted." 151 descPuppetScheduled = "Number of service resources which were scheduled for restart." 152 descPuppetSkipped = "Number of resources which puppet opted to not apply changes to." 153 descPuppetTotalResources = "Total number of resources." 154 descPuppetTotalChanges = "Total number of changes." 155 descPuppetTotalTime = "Total time which puppet took to run." 156 descPuppetModuleTime = "Time which this tagged module took to run." 157 descPuppetLastRun = "Number of seconds since puppet run last ran." 158 descPuppetRunStatus = "0: changed, 1: unchanged, 2: failed" 159 )