bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/systemd_linux.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "os" 6 "os/exec" 7 "regexp" 8 "strconv" 9 "strings" 10 11 "bosun.org/cmd/scollector/conf" 12 "bosun.org/metadata" 13 "bosun.org/opentsdb" 14 "bosun.org/util" 15 "github.com/coreos/go-systemd/dbus" 16 ) 17 18 type systemdServiceConfig struct { 19 regex *regexp.Regexp 20 watch bool 21 } 22 23 var systemdServices []*systemdServiceConfig 24 25 func init() { 26 registerInit(func(c *conf.Conf) { 27 if _, err := exec.LookPath("systemctl"); err == nil { 28 for _, s := range c.SystemdService { 29 AddSystemdServiceConfig(s) 30 } 31 collectors = append(collectors, &IntervalCollector{ 32 F: func() (opentsdb.MultiDataPoint, error) { 33 return c_systemd() 34 }, 35 name: "c_systemd", 36 }) 37 } 38 }) 39 } 40 41 func AddSystemdServiceConfig(params conf.ServiceParams) error { 42 if params.Name == "" { 43 return fmt.Errorf("empty service Name") 44 } 45 reg, err := regexp.Compile(params.Name) 46 if err != nil { 47 return err 48 } 49 serviceConfig := systemdServiceConfig{regex: reg, watch: params.WatchProc} 50 systemdServices = append(systemdServices, &serviceConfig) 51 return nil 52 } 53 54 // c_systemd() iterates through all units provided by systemd's dbus info. 55 // If the unit is a service we care about, it sends service metrics. 56 // Also sends process data if WatchProc was set to true in the [[SystemdService]] config. 57 func c_systemd() (opentsdb.MultiDataPoint, error) { 58 conn, err := dbus.New() 59 if err != nil { 60 return nil, err 61 } 62 defer conn.Close() 63 64 units, err := conn.ListUnits() 65 if err != nil { 66 return nil, err 67 } 68 69 var md opentsdb.MultiDataPoint 70 for _, unit := range units { 71 if strings.HasSuffix(unit.Name, ".service") { 72 shortName := strings.TrimSuffix(unit.Name, ".service") 73 for _, config := range systemdServices { 74 if config.regex.MatchString(shortName) { 75 if config.watch { 76 err = watchSystemdServiceProc(&md, conn, unit) 77 if err != nil { 78 return nil, err 79 } 80 } 81 if _, ok := activeState[unit.ActiveState]; !ok { 82 return nil, fmt.Errorf("error: unit %s has unknown ActiveState: %s\n", unit.Name, unit.ActiveState) 83 } 84 systemdTags := opentsdb.TagSet{"name": unit.Name} 85 osTags := opentsdb.TagSet{"name": shortName} 86 Add(&md, "linux.systemd.unit.activestate", activeState[unit.ActiveState], systemdTags, metadata.Gauge, metadata.StatusCode, descActiveState) 87 Add(&md, osServiceRunning, util.Btoi(unit.ActiveState == "active"), osTags, metadata.Gauge, metadata.Bool, osServiceRunningDesc) 88 } 89 } 90 } 91 } 92 93 return md, err 94 } 95 96 // watchSystemdService() attempts to determine the main PID of a service and 97 // builds a WatchedProc{} which is then sent to linuxProcMonitor() 98 func watchSystemdServiceProc(md *opentsdb.MultiDataPoint, conn *dbus.Conn, unit dbus.UnitStatus) error { 99 // ExecMainPID can be non-running. MainPID is the pid of the running service. 100 mainPIDProp, err := conn.GetUnitTypeProperty(unit.Name, "Service", "MainPID") 101 if err != nil { 102 return err 103 } 104 105 mainPID, ok := mainPIDProp.Value.Value().(uint32) 106 if !ok { 107 return fmt.Errorf("Received unexpected PID type for service %s.", unit.Name) 108 } 109 // MainPID is 0 if there is no running service. 110 if mainPID == 0 { 111 return nil 112 } 113 pidStr := strconv.Itoa(int(mainPID)) 114 115 cmdline, err := getLinuxCmdline(pidStr) 116 if err != nil { 117 return err 118 } 119 if cmdline == nil { 120 return nil 121 } 122 123 pidFile, err := os.Stat("/proc/" + pidStr) 124 if err != nil { 125 return err 126 } 127 128 proc := Process{ 129 Pid: pidStr, 130 Command: cmdline[0], 131 Started: pidFile.ModTime(), 132 } 133 134 wp := WatchedProc{ 135 Command: regexp.MustCompile("^" + regexp.QuoteMeta(cmdline[0]) + "$"), 136 Name: strings.TrimSuffix(unit.Name, ".service"), 137 Processes: make(map[Process]int), 138 ArgMatch: regexp.MustCompile(""), 139 idPool: new(idPool)} 140 141 // Since we only have one PID per service (at the moment), this is always set to 1 142 wp.Processes[proc] = wp.get() 143 144 if e := linuxProcMonitor(&wp, md); e != nil { 145 return e 146 } 147 148 return err 149 } 150 151 var activeState = map[string]int{ 152 "active": 0, 153 "reloading": 1, 154 "inactive": 2, 155 "failed": 3, 156 "activating": 4, 157 "deactivating": 5, 158 } 159 160 const ( 161 descActiveState = "0: active, 1: reloading, 2: inactive, 3: failed, 4: activating, 5: deactivating" 162 )