bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/systemd_linux.go (about)

     1  package collectors
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"os/exec"
     7  	"regexp"
     8  	"strconv"
     9  	"strings"
    10  
    11  	"bosun.org/cmd/scollector/conf"
    12  	"bosun.org/metadata"
    13  	"bosun.org/opentsdb"
    14  	"bosun.org/util"
    15  	"github.com/coreos/go-systemd/dbus"
    16  )
    17  
    18  type systemdServiceConfig struct {
    19  	regex *regexp.Regexp
    20  	watch bool
    21  }
    22  
    23  var systemdServices []*systemdServiceConfig
    24  
    25  func init() {
    26  	registerInit(func(c *conf.Conf) {
    27  		if _, err := exec.LookPath("systemctl"); err == nil {
    28  			for _, s := range c.SystemdService {
    29  				AddSystemdServiceConfig(s)
    30  			}
    31  			collectors = append(collectors, &IntervalCollector{
    32  				F: func() (opentsdb.MultiDataPoint, error) {
    33  					return c_systemd()
    34  				},
    35  				name: "c_systemd",
    36  			})
    37  		}
    38  	})
    39  }
    40  
    41  func AddSystemdServiceConfig(params conf.ServiceParams) error {
    42  	if params.Name == "" {
    43  		return fmt.Errorf("empty service Name")
    44  	}
    45  	reg, err := regexp.Compile(params.Name)
    46  	if err != nil {
    47  		return err
    48  	}
    49  	serviceConfig := systemdServiceConfig{regex: reg, watch: params.WatchProc}
    50  	systemdServices = append(systemdServices, &serviceConfig)
    51  	return nil
    52  }
    53  
    54  // c_systemd() iterates through all units provided by systemd's dbus info.
    55  // If the unit is a service we care about, it sends service metrics.
    56  // Also sends process data if WatchProc was set to true in the [[SystemdService]] config.
    57  func c_systemd() (opentsdb.MultiDataPoint, error) {
    58  	conn, err := dbus.New()
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	defer conn.Close()
    63  
    64  	units, err := conn.ListUnits()
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	var md opentsdb.MultiDataPoint
    70  	for _, unit := range units {
    71  		if strings.HasSuffix(unit.Name, ".service") {
    72  			shortName := strings.TrimSuffix(unit.Name, ".service")
    73  			for _, config := range systemdServices {
    74  				if config.regex.MatchString(shortName) {
    75  					if config.watch {
    76  						err = watchSystemdServiceProc(&md, conn, unit)
    77  						if err != nil {
    78  							return nil, err
    79  						}
    80  					}
    81  					if _, ok := activeState[unit.ActiveState]; !ok {
    82  						return nil, fmt.Errorf("error: unit %s has unknown ActiveState: %s\n", unit.Name, unit.ActiveState)
    83  					}
    84  					systemdTags := opentsdb.TagSet{"name": unit.Name}
    85  					osTags := opentsdb.TagSet{"name": shortName}
    86  					Add(&md, "linux.systemd.unit.activestate", activeState[unit.ActiveState], systemdTags, metadata.Gauge, metadata.StatusCode, descActiveState)
    87  					Add(&md, osServiceRunning, util.Btoi(unit.ActiveState == "active"), osTags, metadata.Gauge, metadata.Bool, osServiceRunningDesc)
    88  				}
    89  			}
    90  		}
    91  	}
    92  
    93  	return md, err
    94  }
    95  
    96  // watchSystemdService() attempts to determine the main PID of a service and
    97  // builds a WatchedProc{} which is then sent to linuxProcMonitor()
    98  func watchSystemdServiceProc(md *opentsdb.MultiDataPoint, conn *dbus.Conn, unit dbus.UnitStatus) error {
    99  	// ExecMainPID can be non-running. MainPID is the pid of the running service.
   100  	mainPIDProp, err := conn.GetUnitTypeProperty(unit.Name, "Service", "MainPID")
   101  	if err != nil {
   102  		return err
   103  	}
   104  
   105  	mainPID, ok := mainPIDProp.Value.Value().(uint32)
   106  	if !ok {
   107  		return fmt.Errorf("Received unexpected PID type for service %s.", unit.Name)
   108  	}
   109  	// MainPID is 0 if there is no running service.
   110  	if mainPID == 0 {
   111  		return nil
   112  	}
   113  	pidStr := strconv.Itoa(int(mainPID))
   114  
   115  	cmdline, err := getLinuxCmdline(pidStr)
   116  	if err != nil {
   117  		return err
   118  	}
   119  	if cmdline == nil {
   120  		return nil
   121  	}
   122  
   123  	pidFile, err := os.Stat("/proc/" + pidStr)
   124  	if err != nil {
   125  		return err
   126  	}
   127  
   128  	proc := Process{
   129  		Pid:     pidStr,
   130  		Command: cmdline[0],
   131  		Started: pidFile.ModTime(),
   132  	}
   133  
   134  	wp := WatchedProc{
   135  		Command:   regexp.MustCompile("^" + regexp.QuoteMeta(cmdline[0]) + "$"),
   136  		Name:      strings.TrimSuffix(unit.Name, ".service"),
   137  		Processes: make(map[Process]int),
   138  		ArgMatch:  regexp.MustCompile(""),
   139  		idPool:    new(idPool)}
   140  
   141  	// Since we only have one PID per service (at the moment), this is always set to 1
   142  	wp.Processes[proc] = wp.get()
   143  
   144  	if e := linuxProcMonitor(&wp, md); e != nil {
   145  		return e
   146  	}
   147  
   148  	return err
   149  }
   150  
   151  var activeState = map[string]int{
   152  	"active":       0,
   153  	"reloading":    1,
   154  	"inactive":     2,
   155  	"failed":       3,
   156  	"activating":   4,
   157  	"deactivating": 5,
   158  }
   159  
   160  const (
   161  	descActiveState = "0: active, 1: reloading, 2: inactive, 3: failed, 4: activating, 5: deactivating"
   162  )